## Introvert vs Extrovert Classification Model Training

### Import Required Libraries

In [12]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Classification Models
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier, ExtraTreesClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
from sklearn.naive_bayes import GaussianNB

# Evaluation Metrics
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.metrics import f1_score, precision_score, recall_score

### Load Cleaned and Encoded Dataset

In [4]:
df = pd.read_csv('data/clean_encoded_data.csv')
df.head()

Unnamed: 0,Time_spent_Alone,Stage_fear,Social_event_attendance,Going_outside,Drained_after_socializing,Friends_circle_size,Post_frequency,Personality
0,4.0,0,4.0,6.0,0,13.0,5.0,0
1,9.0,1,0.0,0.0,1,0.0,3.0,1
2,9.0,1,1.0,2.0,1,5.0,2.0,1
3,0.0,0,6.0,7.0,0,14.0,8.0,0
4,3.0,0,9.0,4.0,0,8.0,5.0,0


### Data Splitting into Dependent and Independent Features

In [5]:
# Independent Features
X = df.drop(columns=['Personality'], axis=1)

# Dependent Feature
y = df['Personality']

In [6]:
X.head()

Unnamed: 0,Time_spent_Alone,Stage_fear,Social_event_attendance,Going_outside,Drained_after_socializing,Friends_circle_size,Post_frequency
0,4.0,0,4.0,6.0,0,13.0,5.0
1,9.0,1,0.0,0.0,1,0.0,3.0
2,9.0,1,1.0,2.0,1,5.0,2.0
3,0.0,0,6.0,7.0,0,14.0,8.0
4,3.0,0,9.0,4.0,0,8.0,5.0


In [7]:
y.head()

0    0
1    1
2    1
3    0
4    0
Name: Personality, dtype: int64

### Splitting Data into Training and Testing

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((2244, 7), (562, 7), (2244,), (562,))

### Feature Scaling

In [9]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [10]:
X_train

array([[-0.70671084, -0.97534832,  0.6159719 , ..., -0.98320694,
         0.33620925,  1.43783553],
       [-0.70671084, -0.97534832,  1.67068535, ..., -0.98320694,
         1.74756635,  0.74283619],
       [ 0.74776155,  1.02527474, -1.14188385, ...,  1.01707989,
        -0.3694693 , -1.34216182],
       ...,
       [ 1.62044499,  1.02527474, -1.14188385, ...,  1.01707989,
        -1.07514785, -0.64716248],
       [-0.41581636, -0.97534832,  0.6159719 , ..., -0.98320694,
         1.51234017,  1.43783553],
       [ 1.03865603,  1.02527474, -1.493455  , ...,  1.01707989,
        -0.3694693 , -1.34216182]], shape=(2244, 7))

In [11]:
X_test

array([[-1.16437295,  1.02527474,  0.90925824, ...,  1.01707989,
        -1.1832046 ,  0.78391183],
       [ 1.32597901,  1.02527474,  1.13588678, ...,  1.01707989,
         0.23557516, -0.30176756],
       [ 1.62044499,  1.02527474, -0.43874155, ...,  1.01707989,
        -0.07099952, -0.64716248],
       ...,
       [ 0.0222172 ,  1.02527474, -0.43874155, ...,  1.01707989,
        -0.60469549, -1.34216182],
       [ 0.1659726 ,  1.02527474, -0.7903127 , ...,  1.01707989,
        -0.60469549, -1.34216182],
       [-0.99760532, -0.97534832,  0.6159719 , ..., -0.98320694,
         0.57143543,  0.39533652]], shape=(562, 7))

### Model Training

**Create an Evaluate Function to give all Metrics After Model Training**

In [13]:
def evaluate_model(y_pred, y_test):
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1_scr = f1_score(y_test, y_pred)
    return accuracy, precision, recall, f1_scr