## Importing the Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
import warnings

warnings.filterwarnings('ignore')

## Load the dataset

In [2]:
df = pd.read_csv('data.csv')

## Displaying the dataset's first 5 records

In [3]:
df.head()

Unnamed: 0.1,Unnamed: 0,Gender,Age,openness,neuroticism,conscientiousness,agreeableness,extraversion,Personality
0,0,Male,17,10,1,6,7,9,Responsible
1,1,Male,19,4,8,7,4,6,Extraveted
2,2,Female,18,3,5,7,7,7,Serious
3,3,Female,22,7,1,7,8,10,Responsible
4,4,Female,19,10,1,7,4,8,Dependable


## Displaying the dataset's last 5 records

In [4]:
df.tail()

Unnamed: 0.1,Unnamed: 0,Gender,Age,openness,neuroticism,conscientiousness,agreeableness,extraversion,Personality
1019,310,Female,19,9,3,9,5,9,Responsible
1020,311,Male,18,4,5,8,9,6,Serious
1021,312,Male,18,4,9,3,7,5,Extraveted
1022,313,Male,23,10,2,10,6,7,Responsible
1023,314,Female,18,8,3,6,7,9,Responsible


## Data Preprocessing (make sure the gender don't have 0 value)

In [5]:
df[df['Gender']==0]

Unnamed: 0.1,Unnamed: 0,Gender,Age,openness,neuroticism,conscientiousness,agreeableness,extraversion,Personality


## Using Lable encoding to change Gender and Personality to Numerical value

In [6]:
label_encoders = {}
# Encode 'Personality (Class label)'
le_personality = LabelEncoder()
df['Personality'] = le_personality.fit_transform(df['Personality'])
label_encoders['Personality'] = le_personality

# Encode 'Gender'
le_gender = LabelEncoder()
df['Gender'] = le_gender.fit_transform(df['Gender'])
label_encoders['Gender'] = le_gender

## Ensuring the values are changed

In [7]:
df.head()

Unnamed: 0.1,Unnamed: 0,Gender,Age,openness,neuroticism,conscientiousness,agreeableness,extraversion,Personality
0,0,1,17,10,1,6,7,9,3
1,1,1,19,4,8,7,4,6,1
2,2,0,18,3,5,7,7,7,4
3,3,0,22,7,1,7,8,10,3
4,4,0,19,10,1,7,4,8,0


## Serparating features and target variables

In [8]:
x=df.drop('Personality', axis="columns")
y=df['Personality']
print("shape of Independent variable x:", x.shape)
print("shape of Independent variable y:",y.shape)

shape of Independent variable x: (1024, 8)
shape of Independent variable y: (1024,)


## Train and Test Split

In [9]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2, random_state=42)

## Printing the shape of x_train(819 rows and 8 columns)

In [10]:
x_train.shape

(819, 8)

 ## Printing the shape of y_train(819 rows and 1 column)

In [11]:
y_train.shape

(819,)

## Printing the shape of x_test(205 rows and 8 columns)

In [12]:
x_test.shape

(205, 8)

## Printing the shape of x_train(205 rows and 1 column)

In [13]:
y_test.shape

(205,)

## Use Standard Scaler to Standrize the data

In [14]:
scaler = StandardScaler()
scaler.fit(x_train)

## Transforming Train data into Array format

In [15]:
scaler.transform(x_train)

array([[-0.76286382,  1.05395671,  0.83166037, ...,  1.55896135,
         0.94078198, -0.19032078],
       [ 0.44886091,  1.05395671, -1.11109446, ...,  1.04783671,
         0.38884757, -0.85360893],
       [ 0.5043983 , -0.94880557, -0.72254349, ...,  1.04783671,
         0.94078198,  1.79954368],
       ...,
       [-0.69217987,  1.05395671,  0.05455844, ...,  0.02558744,
        -0.71502126, -0.19032078],
       [ 0.74169439,  1.05395671, -1.11109446, ...,  0.02558744,
         0.38884757,  0.47296738],
       [-0.93957367,  1.05395671,  2.7744152 , ..., -0.99666184,
         2.0446508 , -1.51689708]])

## Transforming Test data into Array format

In [16]:
scaler.transform(x_test)

array([[ 1.19609116,  1.05395671,  0.83166037, ..., -0.4855372 ,
         2.0446508 , -0.19032078],
       [ 0.34788385, -0.94880557,  0.4431094 , ...,  1.04783671,
        -0.16308685, -0.19032078],
       [ 0.78713407, -0.94880557,  2.38586424, ...,  0.02558744,
        -0.16308685, -0.19032078],
       ...,
       [ 1.21123772,  1.05395671, -0.33399253, ...,  0.02558744,
        -1.81889009, -1.51689708],
       [ 0.02980611,  1.05395671, -0.72254349, ...,  1.04783671,
        -0.16308685, -0.19032078],
       [-1.27279797,  1.05395671, -0.33399253, ..., -1.50778647,
        -0.71502126, -0.19032078]])

## Initialize and train the RandomForestClassifier

In [17]:
rf = RandomForestClassifier(n_estimators = 10, criterion='entropy',random_state=42)
rf.fit(x_train,y_train)

In [18]:
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier

# Make predictions with the trained RandomForest model
x_pred = rf.predict(x_test)

# Calculate and print accuracy
print("Accuracy: ({0:.2f})".format(accuracy_score(y_test, x_pred)))

Accuracy: (0.81)


# Initialize and train the LogisticRegression

In [19]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(random_state=42)
lr.fit(x_train, y_train)

# Make predictions with the trained RandomForest model
x_pred = lr.predict(x_test)

# Calculate and print accuracy
print("Accuracy: ({0:.2f})".format(accuracy_score(y_test, x_pred)))

Accuracy: (0.77)


In [20]:
from sklearn.svm import SVC
svc = SVC(kernel='linear', random_state=42)  # Use 'rbf' for non-linear classification
svc.fit(x_train, y_train)

# Make predictions with the trained RandomForest model
x_pred = svc.predict(x_test)

# Calculate and print accuracy
print("Accuracy: ({0:.2f})".format(accuracy_score(y_test, x_pred)))

Accuracy: (0.78)


## Initialize and train the KNeiphborsClassifier

In [21]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(x_train, y_train)

# Make predictions with the trained RandomForest model
x_pred = knn.predict(x_test)

# Calculate and print accuracy
print("Accuracy: ({0:.2f})".format(accuracy_score(y_test, x_pred)))

Accuracy: (0.51)


## Initialize and train the GradientBoostingClassifier

In [22]:
from sklearn.ensemble import GradientBoostingClassifier
gbc = GradientBoostingClassifier(n_estimators=100, random_state=42)

gbc.fit(x_train, y_train)

# Make predictions with the trained RandomForest model
x_pred = gbc.predict(x_test)

# Calculate and print accuracy
print("Accuracy: ({0:.2f})".format(accuracy_score(y_test, x_pred)))

Accuracy: (0.81)


## Initialize and train the GaussianNB

In [23]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
gnb.fit(x_train, y_train)

# Make predictions with the trained RandomForest model
x_pred = gnb.predict(x_test)

# Calculate and print accuracy
print("Accuracy: ({0:.2f})".format(accuracy_score(y_test, x_pred)))

Accuracy: (0.79)
