In [2]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score,confusion_matrix,f1_score,roc_auc_score
from sklearn.svm import SVC
import pandas as pd
import numpy as np


In [3]:
train_df = pd.read_csv('../data/preprocessed/train_df.csv')

In [4]:
train = train_df.drop('Personality',axis=1)
test = train_df['Personality']

In [5]:
X_train,X_val,y_train,y_val = train_test_split(train,test,random_state=42)

model = RandomForestClassifier(n_estimators=100,min_samples_split=2,max_depth=5,random_state=42)
model.fit(X_train,y_train)
y_pred = model.predict(X_val)
y_pred_proba = model.predict_proba(X_val)[:,1]

acc = accuracy_score(y_val,y_pred)
cm = confusion_matrix(y_val,y_pred)
roc_auc = roc_auc_score(y_val,y_pred_proba)

print(f'accuracy:{acc}')
print(f'confusion matrix:{cm}')
print(f'roc_auc_score:{roc_auc}')

accuracy:0.9671777153962428
confusion matrix:[[3356   63]
 [  89 1123]]
roc_auc_score:0.9653518437541326


In [6]:
model = LGBMClassifier(max_depth=5,n_estimators=200,min_samples_split=2,subsample=0.8,colsample_bytree=0.8,random_state=42,verbose=-1)
model.fit(X_train,y_train)
y_pred = model.predict(X_val)
y_pred_proba = model.predict_proba(X_val)[:,1]

acc = accuracy_score(y_val,y_pred)
cm = confusion_matrix(y_val,y_pred)
roc_auc = roc_auc_score(y_val,y_pred_proba)
f1 = f1_score(y_val,y_pred,average='macro')

print(f'accuracy:{acc}')
print(f'confusion matrix:{cm}')
print(f'roc_auc_score:{roc_auc}')
print(f'f1 score:{f1}')

accuracy:0.9671777153962428
confusion matrix:[[3356   63]
 [  89 1123]]
roc_auc_score:0.9598611477117293
f1 score:0.9572346613631018


In [7]:
model = SVC(kernel='rbf',random_state=42)
model.fit(X_train,y_train)
y_pred = model.predict(X_val)

acc = accuracy_score(y_val,y_pred)
cm = confusion_matrix(y_val,y_pred)
f1 = f1_score(y_val,y_pred)

print(f'accuracy:{acc}')
print(f'confusion matrix:{cm}')
print(f'f1 score:{f1}')


accuracy:0.7382854675016195
confusion matrix:[[3419    0]
 [1212    0]]
f1 score:0.0


In [8]:
model = XGBClassifier(random_state=42)
model.fit(X_train,y_train)
y_pred = model.predict(X_val)
y_pred_proba = model.predict_proba(X_val)[:,1]

acc = accuracy_score(y_val,y_pred)
cm = confusion_matrix(y_val,y_pred)
roc_auc = roc_auc_score(y_val,y_pred_proba)
f1 = f1_score(y_val,y_pred)

print(f'accuracy:{acc}')
print(f'confusion matrix:{cm}')
print(f'roc_auc_score:{roc_auc}')
print(f'f1 score:{f1}')

accuracy:0.9669617793133233
confusion matrix:[[3355   64]
 [  89 1123]]
roc_auc_score:0.9590399263676003
f1 score:0.9362234264276782
