In [1]:
import pandas as pd
import numpy as np
import seaborn as sn
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
import seaborn as sn
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score,recall_score
from sklearn.model_selection import GridSearchCV

class Auto_model():
     
    def __init__(self,path,target):
        
        self.df = pd.read_csv(path)
        self.target = target
        
        self.models_param = {'REG_LOG' :{'penalty' : ['l1', 'l2']},
                            'XGBOOST':{'min_child_weight': [1, 5, 10],'gamma': [0.5, 1, 1.5, 2, 5],
                                       'subsample': [0.6, 0.8, 1.0],'colsample_bytree': [0.6, 0.8, 1.0],'max_depth': [3, 4, 5]},
                           
                            'GBOOST': {'n_estimators':  [10, 50, 100, 200], 'learning_rate': [0.01, 0.1, 1],
                                       'max_depth': [1, 2]},
                                       
                            'SVM_RBF': {'kernel': ['rbf'],'C': [0.01,0.1,1, 10, 50, 100, 500, 1000], 
                                    'gamma': ['auto',0.1, 1, 5, 10, 50,'auto_deprecated']},
                             
                            'RANDOM_FOREST': {'n_estimators':  [1,10], 'criterion': ['entropy', 'gini'], 
                                   'max_depth': [1, 2], 'max_features': ['auto']},
                             
                            'ADABOOST': {'n_estimators':  [10, 50, 100, 200], 'learning_rate': [0.1, 1, 10]}
}

     
               
        self.models_grid = {}
        self.models_grid['REG_LOG'] = GridSearchCV(LogisticRegression(random_state=0),self.models_param['REG_LOG'] ,
                                             scoring='accuracy', verbose=True)
        self.models_grid['XGBOOST'] = GridSearchCV(XGBClassifier(random_state=0),self.models_param['XGBOOST'], 
                                              scoring='accuracy', verbose=True)
        self.models_grid['GBOOST'] = GridSearchCV(GradientBoostingClassifier(random_state=0),self.models_param['GBOOST'],
                                             scoring='accuracy', verbose=True)
        self.models_grid['SVM_RBF'] = GridSearchCV(SVC(kernel='rbf',random_state=0),self.models_param['SVM_RBF'],
                                          scoring='accuracy', verbose=True)
        self.models_grid['RANDOM_FOREST'] = GridSearchCV(RandomForestClassifier(random_state=0),
                                                self.models_param['RANDOM_FOREST'],scoring='accuracy', verbose=True)
        
        self.models_grid['ADABOOST'] = GridSearchCV(AdaBoostClassifier(random_state=0),self.models_param['ADABOOST'],
                                               scoring='accuracy', verbose=True)
  
        self.models = {}
        self.models['REG_LOG'] = LogisticRegression(random_state=0)
        self.models['XGBOOST'] = XGBClassifier(random_state=0)
        self.models['GBOOST'] = GradientBoostingClassifier(random_state=0)
        self.models['SVM_RBF'] = SVC(kernel='rbf',random_state=0)
        self.models['RANDOM_FOREST'] = RandomForestClassifier(random_state=0)                                 
        self.models['ADABOOST'] = AdaBoostClassifier(random_state=0)
        
    def preprocess(self):

        taille = self.df.shape
        
        for column in self.df.columns:
            if self.df[column].isnull().sum()/taille[0] > 0.2:
                self.df.drop([column], inplace=True, axis=1)

        self.df.dropna(axis=0, inplace=True)

        y = self.df[self.target]
        X = self.df.drop([self.target], axis=1)

        for column in X.columns:
            try:
                pd.to_numeric(X[column])
            except ValueError:
                dummies = pd.get_dummies(X[column], drop_first=True)
                X = pd.concat([X, dummies], axis=1)
                X.drop([column], inplace=True, axis=1)
                
                
                
        return train_test_split(X, y, test_size=0.20, random_state=0)
   
    
        
    def fit(self,X,y,grid_search = True):
        if grid_search:
            
            for model in self.models_grid:
        
                self.models_grid[model].fit(X,y).best_estimator_
        else:
            
            for model in self.models:
                self.models[model].fit(X,y)
        self.grid_search = grid_search
             
    
    def predict(self,X):
        predict_dic = {}
        if self.grid_search:
            for model in self.models_grid:
                predict_dic[model] = self.models_grid[model].predict(X)
            return predict_dic
        else:
            for model in self.models:
                predict_dic[model] = self.models[model].predict(X)
            return predict_dic
                
            
            

    def accuracy_ml(self,y,y_pred):
        accuracy_dict = {}
        if self.grid_search:
            for model in self.models_grid:
                accuracy_dict[model] = accuracy_score(y_pred[model], y) 
            #self.accuracy_dict =accuracy_dict[model]
            return accuracy_dict
        else:
            for model in self.models:
                accuracy_dict[model] = accuracy_score(y_pred[model], y) 
            #self.accuracy_dict =accuracy_dict[model]
            return accuracy_dict
    
    def f1score_ml(self,y,y_pred):
        f1_score_dict = {}
        if self.grid_search:
            for model in self.models_grid:
                f1_score_dict[model] = f1_score(y_pred[model], y,average='weighted') 
            return f1_score_dict
        else:
            for model in self.models:
                f1_score_dict[model] = f1_score(y_pred[model], y,average='weighted') 
            return f1_score_dict

In [2]:
models = Auto_model(path = "Social_Network_Ads.csv", target = 'Purchased', )
X_train, X_test, y_train, y_test = models.preprocess()
models.fit(X_train,y_train,grid_search=False)
accuracy_test = models.accuracy_ml(y_test, models.predict(X_test))
fscore = models.f1score_ml(y_test, models.predict(X_test))

In [3]:
accuracy_test

{'REG_LOG': 0.825,
 'XGBOOST': 0.9375,
 'GBOOST': 0.925,
 'SVM_RBF': 0.725,
 'RANDOM_FOREST': 0.9125,
 'ADABOOST': 0.925}

In [4]:
fscore

{'REG_LOG': 0.8437908496732026,
 'XGBOOST': 0.9370772946859903,
 'GBOOST': 0.9240274599542335,
 'SVM_RBF': 0.8405797101449275,
 'RANDOM_FOREST': 0.9131435102365334,
 'ADABOOST': 0.9261501210653755}