### 1.0 Importing Libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

### 1.1 Loading the datasets

In [None]:
archivo_url = 'https://raw.githubusercontent.com/NgoQuocBao1010/Exercise-Correction/main/core/plank_model/train.csv'

archivo = 'train.csv'

ds = pd.read_csv(archivo)

ds

In [None]:
archivo_url1 = 'https://raw.githubusercontent.com/NgoQuocBao1010/Exercise-Correction/main/core/plank_model/test.csv'

archivo = 'test.csv'

ds_test = pd.read_csv(archivo)

ds_test

### 1.2 Dataset preprocessing
Replacing high back and low back with 2 and 1 respectively and correct with 0

In [None]:
ds_test['label'] = ds_test['label'].replace('H', 2)
ds_test['label'] = ds_test['label'].replace('L', 1)
ds_test['label'] = ds_test['label'].replace('C', 0)

In [None]:
ds['label'] = ds['label'].replace('H', 2)
ds['label'] = ds['label'].replace('L', 1)
ds['label'] = ds['label'].replace('C', 0)

### 1.3 Exploratory Data Analysis

In [None]:
ds.columns

In [None]:
# dataset correlation matrix
cm = ds.corr()

fig, ax = plt.subplots(figsize=(10, 8))

sns.heatmap(cm, ax=ax)

plt.show()

In [None]:
ds["label"].value_counts()

In [None]:
# plot the distribution of the target variable
sns.histplot(ds["label"], kde=True)

In [None]:
# sns.pairplot(ds, hue="label")

In [None]:
ds_true = ds[ds["label"] == 1]
ds_false = ds[ds["label"] == 0]
ds_two = ds[ds["label"] == 2]

fig, ax = plt.subplots(figsize=(10, 6))

sns.histplot(ds_true["left_wrist_y"], kde=True, color="red", ax=ax)
sns.histplot(ds_false["left_wrist_y"], kde=True, color="blue", ax=ax)
sns.histplot(ds_two["left_wrist_y"], kde=True, color="yellow", ax=ax)

fig.legend(labels=["Low back", "Correct", "High back"])

### 1.4 Data Modeling

In [None]:
from sklearn.linear_model import LogisticRegression

log_model = LogisticRegression()
log_model.fit(ds.loc[:, ds.columns != 'label'], ds["label"].values.ravel())

### 1.5 Model Evaluation

In [None]:
# predict the target variable
y_pred = log_model.predict(ds_test.loc[:, ds_test.columns != 'label'])

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

# calculate the accuracy of the model

accuracy = accuracy_score(ds_test['label'], y_pred)
print("Accuracy:", accuracy)

# calculate the confusion matrix
cm = confusion_matrix(ds_test['label'], y_pred)

fig, ax = plt.subplots(figsize=(10, 6))
sns.heatmap(cm, annot=True, ax=ax)
_ = plt.xlabel("Predicted")
_ = plt.ylabel("Actual")

In [None]:
from sklearn.metrics import classification_report

print(classification_report(ds_test['label'], y_pred))

### 1.6 Feature Scaling

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

pipe = Pipeline([
    ("scaler", StandardScaler()), 
    ("logistic", LogisticRegression())
])

pipe.fit(ds.loc[:, ds.columns != 'label'], ds['label'].values.ravel())

y_pred_scale = pipe.predict(ds_test.loc[:, ds_test.columns != 'label'])

accuracy = accuracy_score(ds_test['label'], y_pred_scale)
cm = confusion_matrix(ds_test['label'], y_pred_scale)

print("Accuracy:", accuracy)

fig, ax = plt.subplots(figsize=(10, 6))
sns.heatmap(cm, annot=True, ax=ax)
_ = plt.xlabel("Predicted")
_ = plt.ylabel("Actual")

In [None]:
print("Coefficients:", log_model.coef_)
print("Intercept:", log_model.intercept_)

### 1.7 Feature Importance

In [None]:
import numpy as np

fig, ax = plt.subplots(figsize=(10, 8))

feature_importances = pd.DataFrame(
    {"column": ds.loc[:, ds.columns != 'label'].columns, "coef": np.abs(pipe.named_steps["logistic"].coef_[0])}
).sort_values(by="coef", ascending=True).tail()

ax.barh(feature_importances["column"], feature_importances["coef"])

### 1.8 Hyperparameter Tuning

In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    "logistic__C": [0.1, 1, 10, 100, 1000],
    "logistic__penalty": ["l1", "l2"]
}

grid = GridSearchCV(pipe, param_grid, cv=5)
grid.fit(ds.loc[:, ds.columns != 'label'], ds['label'].values.ravel())

print("Best cross-validation score: {:.2f}".format(grid.best_score_))
print("Best parameters: ", grid.best_params_)



### 2.0 Decision Tree Classifier

In [None]:
from sklearn.tree import DecisionTreeClassifier

tree = DecisionTreeClassifier()
tree.fit(ds.loc[:, ds.columns != 'label'], ds["label"].values.ravel())

y_pred_tree = tree.predict(ds_test.loc[:, ds_test.columns != 'label'])

accuracy = accuracy_score(ds_test['label'], y_pred_tree)
cm = confusion_matrix(ds_test['label'], y_pred_tree)

print("Accuracy:", accuracy)

# plot confusion matrix 

fig, ax = plt.subplots(figsize=(10, 6))
sns.heatmap(cm, annot=True, ax=ax)
_ = plt.xlabel("Predicted")
_ = plt.ylabel("Actual")

plt.show()



In [None]:
# plot the feature importance
fig, ax = plt.subplots(figsize=(10, 8))

feature_importances = pd.DataFrame(
    {"column": ds.loc[:, ds.columns != 'label'].columns, "coef": tree.feature_importances_}
).sort_values(by="coef", ascending=True).tail()

ax.barh(feature_importances["column"], feature_importances["coef"])

plt.show()

In [None]:
# plot the decision tree
from sklearn.tree import plot_tree

fig, ax = plt.subplots(figsize=(20, 10))

_ = plot_tree(tree, filled=True, ax=ax)

plt.show()

### 2.1 Random Forest Classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier

forest = RandomForestClassifier()
forest.fit(ds.loc[:, ds.columns != 'label'], ds["label"].values.ravel())

y_pred_forest = forest.predict(ds_test.loc[:, ds_test.columns != 'label'])

accuracy = accuracy_score(ds_test['label'], y_pred_forest)
cm = confusion_matrix(ds_test['label'], y_pred_forest)

print("Accuracy:", accuracy)

# plot confusion matrix

fig, ax = plt.subplots(figsize=(10, 6))
sns.heatmap(cm, annot=True, ax=ax)
_ = plt.xlabel("Predicted")
_ = plt.ylabel("Actual")

plt.show()

### 2.2 Gradient Boosting Classifier

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

gb = GradientBoostingClassifier()
gb.fit(ds.loc[:, ds.columns != 'label'], ds["label"].values.ravel())

y_pred_gb = gb.predict(ds_test.loc[:, ds_test.columns != 'label'])

accuracy = accuracy_score(ds_test['label'], y_pred_gb)

cm = confusion_matrix(ds_test['label'], y_pred_gb)

print("Accuracy:", accuracy)

# plot confusion matrix

fig, ax = plt.subplots(figsize=(10, 6))
sns.heatmap(cm, annot=True, ax=ax)
_ = plt.xlabel("Predicted")
_ = plt.ylabel("Actual")

plt.show()

In [None]:
# plot the feature importance
fig, ax = plt.subplots(figsize=(10, 8))

feature_importances = pd.DataFrame(
    {"column": ds.loc[:, ds.columns != 'label'].columns, "coef": gb.feature_importances_}
).sort_values(by="coef", ascending=True).tail()

ax.barh(feature_importances["column"], feature_importances["coef"])

plt.show()

In [None]:
# plot the decision tree
fig, ax = plt.subplots(figsize=(20, 10))

_ = plot_tree(gb.estimators_[0][0], filled=True, ax=ax)

plt.show()

### 2.3 Support Vector Machine

In [None]:
from sklearn.svm import SVC

svc = SVC()
svc.fit(ds.loc[:, ds.columns != 'label'], ds["label"].values.ravel())

y_pred_svc = svc.predict(ds_test.loc[:, ds_test.columns != 'label'])

accuracy = accuracy_score(ds_test['label'], y_pred_svc)

cm = confusion_matrix(ds_test['label'], y_pred_svc)

print("Accuracy:", accuracy)

# plot confusion matrix

fig, ax = plt.subplots(figsize=(10, 6))
sns.heatmap(cm, annot=True, ax=ax)
_ = plt.xlabel("Predicted")
_ = plt.ylabel("Actual")

plt.show()

### 2.4 K-Nearest Neighbors

In [None]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier()
knn.fit(ds.loc[:, ds.columns != 'label'], ds["label"].values.ravel())

y_pred_knn = knn.predict(ds_test.loc[:, ds_test.columns != 'label'])

accuracy = accuracy_score(ds_test['label'], y_pred_knn)

cm = confusion_matrix(ds_test['label'], y_pred_knn)

print("Accuracy:", accuracy)

# plot confusion matrix

fig, ax = plt.subplots(figsize=(10, 6))
sns.heatmap(cm, annot=True, ax=ax)
_ = plt.xlabel("Predicted")
_ = plt.ylabel("Actual")

plt.show()

### 2.5 Naive Bayes

In [None]:
from sklearn.naive_bayes import GaussianNB

nb = GaussianNB()
nb.fit(ds.loc[:, ds.columns != 'label'], ds["label"].values.ravel())

y_pred_nb = nb.predict(ds_test.loc[:, ds_test.columns != 'label'])

accuracy = accuracy_score(ds_test['label'], y_pred_nb)

cm = confusion_matrix(ds_test['label'], y_pred_nb)

print("Accuracy:", accuracy)

# plot confusion matrix

fig, ax = plt.subplots(figsize=(10, 6))
sns.heatmap(cm, annot=True, ax=ax)
_ = plt.xlabel("Predicted")
_ = plt.ylabel("Actual")

plt.show()

### 2.6 Neural Network

In [None]:
from sklearn.neural_network import MLPClassifier

nn = MLPClassifier()
nn.fit(ds.loc[:, ds.columns != 'label'], ds["label"].values.ravel())

y_pred_nn = nn.predict(ds_test.loc[:, ds_test.columns != 'label'])

accuracy = accuracy_score(ds_test['label'], y_pred_nn)

cm = confusion_matrix(ds_test['label'], y_pred_nn)

print("Accuracy:", accuracy)

# plot confusion matrix

fig, ax = plt.subplots(figsize=(10, 6))
sns.heatmap(cm, annot=True, ax=ax)
_ = plt.xlabel("Predicted")
_ = plt.ylabel("Actual")

plt.show()