In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df = pd.read_csv("/kaggle/input/correlation-between-posture-personality-trait/Myers Briggs Table_S1.csv")

<hr>
<h1> First we'll start with EDA and some interesting facts about the dataset</h1> 


In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.set_index("S No", inplace = True)


In [None]:
df.MBTI.value_counts().plot.bar()

In [None]:
extroverts = df[df["E"] > df["I"]]
introverts = df[df["I"] >= df["E"]]


In [None]:
fig, ax =plt.subplots(1,2)
sns.countplot( x = introverts['POSTURE'], ax=ax[0],  order = introverts['POSTURE'].value_counts().index)
sns.countplot( x = extroverts['POSTURE'], ax=ax[1], order = introverts['POSTURE'].value_counts().index)
ax[1].set_title("extrovert")
ax[0].set_title("introvert")

fig.show()

Turns out extroverts have better portures than introverts

In [None]:
fig, ax =plt.subplots(1,2)
sns.countplot( x = introverts['ACTIVITY LEVEL'], ax=ax[0],  order = introverts['ACTIVITY LEVEL'].value_counts().index)
sns.countplot( x = extroverts['ACTIVITY LEVEL'], ax=ax[1], order = introverts['ACTIVITY LEVEL'].value_counts().index)
ax[1].set_title("extrovert")
ax[0].set_title("introvert")
fig.show()

In [None]:
sns.boxplot(x = df.AGE)


In [None]:
sns.countplot(x = df.SEX)


In [None]:
sns.boxplot(x = df["ACTIVITY LEVEL"], y = df.WEIGHT)


In [None]:
sns.boxplot(x = df["MBTI"], y = df.WEIGHT)


In [None]:
plt.title("Ranked by most extrovert")
df.groupby("MBTI").mean().sort_values("E").E.plot.bar()


In [None]:
plt.title("Ranked: MBTI with good posture")
df[df["POSTURE"] == "A"].MBTI.value_counts().plot.bar()

In [None]:
sns.scatterplot(data = df, x = 'WEIGHT', y = 'HEIGHT',  hue  = 'POSTURE')


In [None]:
sns.scatterplot(data = df, x = 'AGE', y = 'WEIGHT',  hue  = 'POSTURE')


In [None]:
sns.scatterplot(data = df, x = 'AGE', y = 'HEIGHT',  hue  = 'POSTURE')


<hr>
<h1> Now that we are done with the dataset it's time to transform the data </h1>


In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df.SEX  = le.fit_transform(df.SEX )
df.MBTI = le.fit_transform(df.MBTI)


In [None]:
df['ACTIVITY LEVEL'] = df['ACTIVITY LEVEL'].replace({'Low' : 0, 'Moderate' : 1, 'High': 2})
df.POSTURE = df.POSTURE.replace({'A' : 0, 'B' : 1, 'C': 2, 'D':3})

In [None]:
df.head()


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,confusion_matrix,ConfusionMatrixDisplay
from sklearn.neighbors import KNeighborsRegressor
import xgboost as xgb


<h2> XGBOOST </h2>

In [None]:
x,y=df.drop("POSTURE",axis=1),df[["POSTURE"]] 

x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)


In [None]:
xgb_model = xgb.XGBClassifier(objective="multi:softprob", random_state=42)
xgb_model.fit(x_train, y_train)

y_pred = xgb_model.predict(x_test)

print(confusion_matrix(y_test, y_pred))

In [None]:
ConfusionMatrixDisplay(confusion_matrix(y_test, y_pred)).plot()


In [None]:
accuracy_score(y_test, y_pred)

<h2> Decision Tree </h2>

In [None]:
from sklearn.tree import DecisionTreeClassifier
Dtree = DecisionTreeClassifier()
Dtree.fit(x_train, y_train)
y_pred = Dtree.predict(x_test)

print(confusion_matrix(y_test, y_pred))

In [None]:
ConfusionMatrixDisplay(confusion_matrix(y_test, y_pred)).plot()

In [None]:
accuracy_score(y_test, y_pred)

<H2> k-nearest neighbors </H2>

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier

In [None]:
Scaler = StandardScaler()
train_scale = pd.DataFrame(Scaler.fit_transform(x_train), columns = x_train.columns)


In [None]:
scaled_model = pd.DataFrame(Scaler.fit_transform(df))
scaled_model

In [None]:

x,y=scaled_model, df[["POSTURE"]]

x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

In [None]:
error=[]
for i in range(1,25):
    knn = KNeighborsClassifier(n_neighbors = i)
    knn.fit(x_train, y_train.values.ravel())
    y_pred = knn.predict(x_test)
    error.append(accuracy_score(y_test, y_pred))

In [None]:
plt.plot(error)

In [None]:
error

In [None]:

knn = KNeighborsClassifier(n_neighbors = 4)
knn.fit(x_train, y_train.values)
y_pred = knn.predict(x_test)

In [None]:
print(accuracy_score(y_test, y_pred))


In [None]:
ConfusionMatrixDisplay(confusion_matrix(y_test, y_pred)).plot()


<h2> SVC </h2>

In [None]:
from sklearn import svm

x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.7,random_state=42)
clf = svm.SVC(kernel='linear') # Linear Kernel
clf.fit(x_train, y_train)

y_pred = clf.predict(x_test)

In [None]:
print("Accuracy:", accuracy_score(y_test, y_pred))

In [None]:
ConfusionMatrixDisplay(confusion_matrix(y_test, y_pred)).plot()


We get the best possible results with SVC