# Importing librairies

In [1]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore", "is_categorical_dtype")
warnings.filterwarnings("ignore", "use_inf_as_na")

In [3]:
df = pd.read_csv("./../PROJET/MLS/MP-4MLSP.csv", index_col=0)

In [11]:
def transform_neutral_to_dissatisfied(df):
    # Replace 'Neutral' with 'Dissatisfied'
    df['satisfaction'] = df['satisfaction'].replace('neutral', 'dissatisfied')
    return df

two_cat_df = transform_neutral_to_dissatisfied(df.copy())
# Verification
print(binary_df["satisfaction"].value_counts())

Series([], Name: count, dtype: int64)


In [12]:
def categorical_to_binary(df, column_name):
    # Define mapping dictionary
    mapping = {'satisfied': 1, 'dissatisfied': 0}
    
    # Map categorical values to binary
    df[column_name] = df[column_name].map(mapping)
    
    return df
binary_df = categorical_to_binary(two_cat_df.copy(),"satisfaction")
binary_df["satisfaction"].value_counts()

satisfaction
0    73452
1    56428
Name: count, dtype: int64

In [37]:
from sklearn.svm import LinearSVC, SVC
from sklearn.naive_bayes import GaussianNB, CategoricalNB
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import  RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import (
    accuracy_score,
    f1_score,
    recall_score,
   
    classification_report,
)
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder, OrdinalEncoder, OneHotEncoder
from sklearn.impute import SimpleImputer, KNNImputer
from sklearn.compose import ColumnTransformer

In [24]:
def getCharacteristic(df):
    df_columns = []
    for col in df.columns:
        if col not in ["Price" ,"satisfaction","id"]:
            df_columns.append(col)
    return df_columns
df_columns = getCharacteristic(df.copy())
def sort_feature_for_each_categorie(df,columns):
    num_columns = []
    cat_columns = []
    for col in columns:
        if df[col].dtype in ['int64', 'float64']:
            num_columns.append(col)
        else:
            cat_columns.append(col)
    return num_columns,cat_columns
num_columns, cat_columns = sort_feature_for_each_categorie(df,df_columns)

## Preprocessor

In [26]:
num_pipeline_sip = Pipeline(steps = [
    ("imputer", SimpleImputer()),
    ("normalization", MinMaxScaler())
])

In [18]:
num_pipeline_knn = Pipeline(steps = [
    ("imputer", KNNImputer()),
    ("normalization", MinMaxScaler())
])

In [27]:
cat_pipeline_ohe = Pipeline(
    [
        ("imputer", SimpleImputer(strategy="most_frequent")),
        ("encoder", OneHotEncoder(drop="first"))
    ]
)

In [28]:
preprocessor_knn = ColumnTransformer(transformers=[
    ("numeric",num_pipeline_knn,num_columns),
    ("categorical",cat_pipeline_ohe,cat_columns)
])

In [None]:
preprocessor_sip = ColumnTransformer(transformers=[
    ("numeric",num_pipeline_sip,num_columns),
    ("categorical",cat_pipeline_ohe,cat_columns)
])

## Pipeline

### SVC

In [32]:

svc_params = {
    'SVC__C': [0.1, 1, 10],
    'SVC__kernel': ['linear', 'rbf'],
    'SVC__gamma': ['scale', 'auto']
}
pipeline_bin_SVC1 = Pipeline(steps =[
    ("preprocessor", preprocessor_knn),
    ("SVC", SVC())
])
pipeline_bin_SVC2 = Pipeline(steps =[
    ("preprocessor", preprocessor_sip),
    ("SVC", SVC())
])

### LSVC

In [35]:
linear_vector_support = {
    'LSVC__penalty': ['l1', 'l2'],
    'LSVC__C': [0.1, 1, 10],
}
pipeline_bin_LSVC1 = Pipeline(steps =[
    ("preprocessor", preprocessor_knn),
    ("LSVC", LinearSVC())
])
pipeline_bin_LSVC2 = Pipeline(steps =[
    ("preprocessor", preprocessor_sip),
    ("LSVC", LinearSVC())
])

### LRG

In [45]:
logistic_reg_params = {
    'LRG__penalty': ['l1', 'l2'],
    'LRG__C': [0.1, 1, 10],
    'LRG__solver': ['liblinear']
}
pipeline_bin_LRG1 = Pipeline(steps =[
    ("preprocessor", preprocessor_knn),
    ("LRG", LogisticRegression())
])
pipeline_bin_LRG2 = Pipeline(steps =[
    ("preprocessor", preprocessor_sip),
    ("LRG", LogisticRegression())
])

### RF

In [42]:


random_forest_params = {
    'RF__n_estimators': [50, 100, 200],
    'RF__max_depth': [None, 10, 20],
    'RF__min_samples_split': [2, 5, 10],
    'RF__min_samples_leaf': [1, 2, 4]
}
pipeline_bin_RF1 = Pipeline(steps =[
    ("preprocessor", preprocessor_knn),
    ("LSVC", RandomForestClassifier())
])
pipeline_bin_RF2 = Pipeline(steps =[
    ("preprocessor", preprocessor_sip),
    ("LSVC", RandomForestClassifier())
])



### DecisionTree

In [43]:



decision_tree_params = {
    'DT__criterion': ['gini', 'entropy'],
    'DT__max_depth': [None, 5, 10, 20],
    'DT__min_samples_split': [2, 5, 10],
    'DT__min_samples_leaf': [1, 2, 4]
}
pipeline_bin_DT1 = Pipeline(steps =[
    ("preprocessor", preprocessor_knn),
    ("DT", DecisionTreeClassifier())
])
pipeline_bin_DT2 = Pipeline(steps =[
    ("preprocessor", preprocessor_sip),
    ("DT", DecisionTreeClassifier())
])



### ADABOOST

In [38]:



gradient_boosting_params = {
    'ADA__n_estimators': [50, 100, 200],
    'ADA__learning_rate': [0.05, 0.1, 0.2],
    'ADA__max_depth': [3, 4, 5],
    'ADA__min_samples_split': [2, 5, 10]
}

pipeline_bin_ADA1 = Pipeline(steps =[
    ("preprocessor", preprocessor_knn),
    ("ADA", AdaBoostClassifier())
])
pipeline_bin_ADA2 = Pipeline(steps =[
    ("preprocessor", preprocessor_sip),
    ("ADA", AdaBoostClassifier())
])



## Dict of pipeline

In [46]:
pipeline_params = {
    'SVC1': {
        'pipeline': pipeline_bin_SVC1,
        'params': svc_params
    },
    'SVC2': {
        'pipeline': pipeline_bin_SVC2,
        'params': svc_params
    },
    'LSVC1': {
        'pipeline': pipeline_bin_LSVC1,
        'params': linear_vector_support
    },
    'LSVC2': {
        'pipeline': pipeline_bin_LSVC2,
        'params': linear_vector_support
    },
    
    'LRG1': {
        'pipeline': pipeline_bin_LRG1,
        'params': logistic_reg_params
    },
    'LRG2': {
        'pipeline': pipeline_bin_LRG2,
        'params': logistic_reg_params
    },
    'Random Forest': {
        'pipeline': pipeline_bin_RF1,
        'params': random_forest_params
    },
    'Random Forest1': {
        'pipeline': pipeline_bin_RF2,
        'params': random_forest_params
    },
    'DT1': {
        'pipeline': pipeline_bin_DT1,
        'params': decision_tree_params
    },
    'DT2': {
        'pipeline': pipeline_bin_DT2,
        'params': decision_tree_params
    },
    'ADA1': {
        'pipeline': pipeline_bin_ADA1,
        'params': gradient_boosting_params
    },
    'ADA2': {
        'pipeline': pipeline_bin_ADA2,
        'params': gradient_boosting_params
    }
}