# FLAML Classifier

In [1]:
pip install flaml

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# FLAML
import flaml
from flaml import AutoML

In [3]:
deals= pd.read_csv('Sample_Data_Deals2.csv')
#Change column type from Object to Category for columns 3,4,5
for col in ['Country', 'Industry', 'Deal Status']:
    deals[col] = deals[col].astype('category')

In [4]:
deals.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype   
---  ------         --------------  -----   
 0   OrderID        100 non-null    object  
 1   OrderQuantity  100 non-null    int64   
 2   OrderValue     100 non-null    int64   
 3   Country        100 non-null    category
 4   Industry       100 non-null    category
 5   Deal Status    100 non-null    category
dtypes: category(3), int64(2), object(1)
memory usage: 3.4+ KB


In [5]:
#Drop Order ID
deals1 = pd.DataFrame(deals.iloc[:,1:])
deals1.head()

Unnamed: 0,OrderQuantity,OrderValue,Country,Industry,Deal Status
0,371,383,Canada,Technology,Won
1,163,121,Canada,Finance,Won
2,191,117,Australia,Manufacturing,Lost
3,150,143,Australia,Manufacturing,Lost
4,165,148,Australia,Manufacturing,Lost


In [6]:
#Encoding categorical data, 
Country = {'Australia': 1, 'Canada':2, 'China':3, 'France':4, 'Germany':5}
deals1['Country'] = deals1['Country'].map(Country)

Industry = {'Energy':1, 'Finance':2, 'Government':3, 'Healthcare':4, 'Manufacturing':5, 'Retail':6, 'Technology':7}
deals1['Industry'] = deals1['Industry'].map(Industry)

dealstat= {'Won':1, 'Lost':0}
deals1['Deal Status'] = deals1['Deal Status'].map(dealstat)

print(deals1.head())

   OrderQuantity  OrderValue Country Industry Deal Status
0            371         383       2        7           1
1            163         121       2        2           1
2            191         117       1        5           0
3            150         143       1        5           0
4            165         148       1        5           0


In [18]:
#Create x and y datasets
X = deals1.drop('Deal Status',axis=1).values
y = deals1['Deal Status'].values
X= np.array(X)
y=np.array(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=101)
type(y_train)
y_train.shape

(75,)

In [19]:
settings = {
    "time_budget": 600,  # total running time in seconds
    "metric": 'accuracy', 
                        # check the documentation for options of metrics (https://microsoft.github.io/FLAML/docs/Use-Cases/Task-Oriented-AutoML#optimization-metric)
    "task": 'classification',  # task type
    "log_file_name": 'deals_experiment.log',  # flaml log file
    "seed": 101,    # random seed
}
automl = AutoML()
'''The main flaml automl API'''
automl.fit(X_train=X_train, y_train=y_train, **settings)


[flaml.automl.logger: 12-26 13:48:06] {1728} INFO - task = classification
[flaml.automl.logger: 12-26 13:48:06] {1739} INFO - Evaluation method: cv
[flaml.automl.logger: 12-26 13:48:06] {1838} INFO - Minimizing error metric: 1-accuracy
[flaml.automl.logger: 12-26 13:48:07] {1955} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'xgboost', 'extra_tree', 'xgb_limitdepth', 'sgd', 'catboost', 'lrl1']
[flaml.automl.logger: 12-26 13:48:07] {2258} INFO - iteration 0, current learner lgbm
[flaml.automl.logger: 12-26 13:48:07] {2393} INFO - Estimated sufficient time budget=1453s. Estimated necessary time budget=36s.
[flaml.automl.logger: 12-26 13:48:07] {2442} INFO -  at 0.3s,	estimator lgbm's best error=0.3733,	best estimator lgbm's best error=0.3733
[flaml.automl.logger: 12-26 13:48:07] {2258} INFO - iteration 1, current learner lgbm
[flaml.automl.logger: 12-26 13:48:07] {2442} INFO -  at 0.5s,	estimator lgbm's best error=0.3733,	best estimator lgbm's best error=0.3733
[flaml.automl.l

In [22]:
#'''Retrieve best config and best learner'''
print('Best ML leaner:', automl.best_estimator)
print('Best hyperparmeter config:', automl.best_config)
print('Best accuracy: {0:.4g}'.format(1-automl.best_loss))
print('Training duration of best run: {0:.4g} s'.format(automl.best_config_train_time))

Best ML leaner: lgbm
Best hyperparmeter config: {'n_estimators': 10, 'num_leaves': 4, 'min_child_samples': 4, 'learning_rate': 0.048152910120225094, 'log_max_bin': 5, 'colsample_bytree': 0.49316779663872445, 'reg_alpha': 0.3603774679946051, 'reg_lambda': 1.084154324255824}
Best accuracy: 0.7067
Training duration of best run: 0.06514 s


In [21]:
automl.model.estimator