# Experiment 3

- try to use stacking and voting approche using the tuned models from the experiment 2
- use `StandardScaler`
- compare the result


In [79]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier,VotingClassifier,StackingClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

from sklearn.metrics import accuracy_score,f1_score,confusion_matrix,roc_auc_score
import time
import json

In [2]:
def evaluate_model(model,model_name:str,train:tuple,test:tuple):
    start_train=time.time()
    model.fit(train[0],train[1])
    end_train=time.time()
    start_test=time.time()
    preds = model.predict(test[0])
    end_test=time.time()
    accuracy=accuracy_score(test[1],preds)
    f1_=f1_score(test[1],preds)
    auc=roc_auc_score(test[1],preds)
    cm=confusion_matrix(test[1],preds)
    return {"model":model_name,
           "accuracy":accuracy,
            "auc":auc,
            'f1_score':f1_,
            "cm":cm,
            'training_time(ms)':1000*(end_train-start_train),
            'testing_time(ms)':1000*(end_test-start_test)
           }

In [3]:
df=pd.read_csv('../data/cleaned_data.csv')
X_train,X_test,y_train,y_test=train_test_split(df.drop('target',axis=1),df.target,test_size=0.2,random_state=42)
scaler=StandardScaler()
train=(scaler.fit_transform(X_train),y_train)
test=(scaler.transform(X_test),y_test)

In [7]:
 
with open('../result/svc_best_hyperparameters.json') as f :
    svc_best_params=json.load(f)
with open('../result/rfc_best_hyperparameters.json') as f :
    rfc_best_params=json.load(f)
with open('../result/gbc_best_hyperparameters.json') as f :
    gbc_best_params=json.load(f)

## VotingClassifer


In [107]:
vc=VotingClassifier([
    ('rfc',RandomForestClassifier(**rfc_best_params)),
    ('gbc',GradientBoostingClassifier(**gbc_best_params)),
    ('svc',SVC(**svc_best_params))
],weights=[3,2,1],voting='hard')

In [108]:
rs_vc=evaluate_model(vc,"VotingClassifier",train,test)

In [110]:
rs_vc

{'model': 'VotingClassifier',
 'accuracy': 0.9021739130434783,
 'auc': 0.9013229760893312,
 'f1_score': 0.9150943396226415,
 'cm': array([[69,  8],
        [10, 97]], dtype=int64),
 'training_time(ms)': 1435.5239868164062,
 'testing_time(ms)': 29.400348663330078}

## StackingClassifier


In [None]:
stc=StackingClassifier(
    [
    ('rfc',RandomForestClassifier(**rfc_best_params)),
    ('gbc',GradientBoostingClassifier(**gbc_best_params)),
    ('svc',SVC(**svc_best_params,probability=True)) 
    ],
    cv=3,
    stack_method='auto'
)

In [112]:
rs_stc=evaluate_model(stc,"StackingClassifier",train,test)

In [113]:
rs_stc

{'model': 'StackingClassifier',
 'accuracy': 0.8804347826086957,
 'auc': 0.8771695594125501,
 'f1_score': 0.897196261682243,
 'cm': array([[66, 11],
        [11, 96]], dtype=int64),
 'training_time(ms)': 4981.812953948975,
 'testing_time(ms)': 40.83561897277832}

In [114]:
result =pd.DataFrame([rs_vc,rs_stc])

In [115]:
result

Unnamed: 0,model,accuracy,auc,f1_score,cm,training_time(ms),testing_time(ms)
0,VotingClassifier,0.902174,0.901323,0.915094,"[[69, 8], [10, 97]]",1435.523987,29.400349
1,StackingClassifier,0.880435,0.87717,0.897196,"[[66, 11], [11, 96]]",4981.812954,40.835619


In [116]:
result.to_csv("../result/exp3.csv",index=False)