In [1]:
# imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json

In [2]:
# globals
seed = 42

In [3]:
df = pd.read_csv('./data/our_features_cleaned_revised.csv')
display(df.head())
print(f"Shape of dataset: {df.shape}\n")
print(f"Unique target values: {df['X5GPAALL'].unique()}")

Unnamed: 0,STU_ID,X1TXMTSCOR,X1PAREDU,X1SES,X1MTHINT,X1SCIINT,X2TXMTSCOR,X2PAREDU,X2BEHAVEIN,X2MTHINT_R,...,A1MSPRGMS,A1HRTEACHERS,A1HRSTUDENT,A1TCHRABSENT,A1PRNTINV,A1UNPREP,A1HEALTH,A1RESOURCES,C1PLANPARENT,C1SIGNOFF
0,10001,59.371,5,1.5644,0.12,-0.23,68.6447,6,-2.17,-0.21,...,1,5,3,1,2,1,1,2,-7,-7
1,10002,47.6821,3,-0.3699,-9.0,-9.0,54.0863,4,0.61,-0.6,...,0,7,9,2,3,3,3,2,-9,-9
2,10003,64.2431,7,1.2741,0.86,0.93,55.6336,7,0.52,-1.39,...,1,15,5,2,2,2,2,2,1,0
3,10006,58.1268,3,1.0639,2.08,-7.0,53.9958,5,0.89,-7.0,...,0,3,6,-9,4,4,2,2,1,1
4,10007,49.496,2,-0.43,2.08,0.16,47.6403,2,0.39,-0.47,...,1,8,6,1,1,1,1,1,1,1


Shape of dataset: (12588, 54)

Unique target values: [3 1 0 2]


In [4]:
df = df.drop(columns=["STU_ID"])
df.X5GPAALL.value_counts()

X5GPAALL
1    4107
2    3164
3    2899
0    2418
Name: count, dtype: int64

In [5]:
df = df.replace([-1,-2,-3,-4,-5,-6,-7,-8,-9],np.nan)

In [6]:
X = df.drop(columns=["X5GPAALL"])
y = df.X5GPAALL

In [7]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size = 0.25, random_state = seed, stratify=y
)

In [8]:
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from imblearn.over_sampling import SMOTE
from collections import Counter

# --- STEP 1: DEFINE COLUMNS AND INSTANTIATE TOOLS ---
bool_cols = ["A1MSMENTOR", "A1MSSPEAKER", "A1MSFLDTRIP", "A1MSPRGMS"]
cat_cols = [
    "X2PAREDU", "X1PAREDU", "X3TGPAHIMTH", "X3TGPAHISCI", "X3TGPASTEM", "X3TGPAACAD",
    "A1TCHRABSENT", "A1PRNTINV", "A1UNPREP", "A1HEALTH", "A1RESOURCES", "A1SCHCONTROL",
    "S2LATESCH", "S2ABSENT", "S2WOHWDN", "S2WOPAPER", "S2WOBOOKS", "S2SKIPCLASS", 
    "S2INSCHSUSP", "S2OUTSCHSUSP", "S2DISCIPLINE", "S2EXPELLED", "S2ARRESTED", "S2JUVHOME"
]
# Define num_cols based on exclusion
num_cols = [c for c in X_train.columns if c not in bool_cols and c not in cat_cols]

# Define Imputers, Scaler, and Encoder
num_imputer = SimpleImputer(strategy="mean")
bool_imputer = SimpleImputer(strategy="most_frequent")
cat_imputer = SimpleImputer(strategy="most_frequent")
scaler = StandardScaler()
encoder = OneHotEncoder(drop='first', sparse_output=False, handle_unknown='ignore')

# --- STEP 2: PREPROCESS TRAINING DATA ---
# A. Split by type
X_train_num = X_train[num_cols]
X_train_bool = X_train[bool_cols]
X_train_cat = X_train[cat_cols]

# B. Impute
X_train_num_imp = pd.DataFrame(num_imputer.fit_transform(X_train_num), columns=num_cols, index=X_train.index)
X_train_bool_imp = pd.DataFrame(bool_imputer.fit_transform(X_train_bool), columns=bool_cols, index=X_train.index)
X_train_cat_imp = pd.DataFrame(cat_imputer.fit_transform(X_train_cat), columns=cat_cols, index=X_train.index)

# C. Scale (Numeric Only) - ADDED HERE
X_train_scale = pd.DataFrame(scaler.fit_transform(X_train_num_imp), columns=num_cols, index=X_train.index)

# D. Encode (Categorical Only)
X_train_cat_encoded = encoder.fit_transform(X_train_cat_imp)
X_train_cat_encoded_df = pd.DataFrame(
    X_train_cat_encoded, 
    columns=encoder.get_feature_names_out(cat_cols), 
    index=X_train.index
)

# E. Combine (Using Scaled Num, Imputed Bool, and Encoded Cat)
X_train_final = pd.concat([X_train_scale, X_train_bool_imp, X_train_cat_encoded_df], axis=1)

# --- STEP 3: APPLY SMOTE (TRAINING ONLY) ---
print(f"Original Class Distribution: {Counter(y_train)}")

# Strategy: Upsample Class 2 to match the majority class
majority_count = y_train.value_counts().max()
strategy = {2: majority_count} 

smote = SMOTE(sampling_strategy=strategy, random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train_final, y_train)

# Convert back to DataFrame
X_train_resampled = pd.DataFrame(X_train_resampled, columns=X_train_final.columns)

print(f"Resampled Class Distribution: {Counter(y_train_resampled)}")

# --- STEP 4: PREPROCESS TEST DATA (TRANSFORM ONLY) ---
# A. Split
X_test_num = X_test[num_cols]
X_test_bool = X_test[bool_cols]
X_test_cat = X_test[cat_cols]

# B. Impute
X_test_num_imp = pd.DataFrame(num_imputer.transform(X_test_num), columns=num_cols, index=X_test.index)
X_test_bool_imp = pd.DataFrame(bool_imputer.transform(X_test_bool), columns=bool_cols, index=X_test.index)
X_test_cat_imp = pd.DataFrame(cat_imputer.transform(X_test_cat), columns=cat_cols, index=X_test.index)

# C. Scale
X_test_scale = pd.DataFrame(scaler.transform(X_test_num_imp), columns=num_cols, index=X_test.index)

# D. Encode
X_test_cat_encoded = encoder.transform(X_test_cat_imp)
X_test_cat_encoded_df = pd.DataFrame(
    X_test_cat_encoded, 
    columns=encoder.get_feature_names_out(cat_cols), 
    index=X_test.index
)

# E. Combine
X_test_final = pd.concat([X_test_scale, X_test_bool_imp, X_test_cat_encoded_df], axis=1)

Original Class Distribution: Counter({1: 3080, 2: 2373, 3: 2174, 0: 1814})
Resampled Class Distribution: Counter({1: 3080, 2: 3080, 3: 2174, 0: 1814})


[WinError 2] The system cannot find the file specified
  File "c:\Users\abano\anaconda3\Lib\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(
        "wmic CPU Get NumberOfCores /Format:csv".split(),
        capture_output=True,
        text=True,
    )
  File "c:\Users\abano\anaconda3\Lib\subprocess.py", line 554, in run
    with Popen(*popenargs, **kwargs) as process:
         ~~~~~^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\abano\anaconda3\Lib\subprocess.py", line 1039, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
    ~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                        pass_fds, cwd, env,
                        ^^^^^^^^^^^^^^^^^^^
    ...<5 lines>...
                        gid, gids, uid, umask,
                        ^^^^^^^^^^^^^^^^^^^^^^
                        start_new_session, process_group)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^

In [None]:
from sklearn.preprocessing import PolynomialFeatures

poly = PolynomialFeatures(degree=1, include_bias=False)

X_train_poly = poly.fit_transform(X_train_resampled)

In [10]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, classification_report

mlr_model = LogisticRegression(max_iter=1000, random_state=seed, class_weight='balanced')
mlr_model.fit(X_train_poly, y_train_resampled)

mlr_y_hat1 = mlr_model.predict(X_test_final)

mlr_precision_1 = precision_score(y_test, mlr_y_hat1, average="macro", zero_division=0)
print("Precision:", mlr_precision_1)
mlr_recall_1 = recall_score(y_test, mlr_y_hat1, average="macro", zero_division=0)
print("Recall:", mlr_recall_1)
mlr_f1_1 = f1_score(y_test, mlr_y_hat1, average="macro", zero_division=0)
print("F1 Score:", mlr_f1_1)
mlr_accuracy_1 = accuracy_score(y_test, mlr_y_hat1)
print("Accuracy:", mlr_accuracy_1)
print(classification_report(y_test, mlr_y_hat1, zero_division=0))



ValueError: X has 119 features, but LogisticRegression is expecting 7259 features as input.

In [None]:
from sklearn.svm import SVC
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, classification_report

model = SVC(kernel='poly').fit(X_train_poly, y_train_resampled)

svm_y_hat3 = model.predict(X_test_poly)

mlr_precision_3 = precision_score(y_test, svm_y_hat3, average="macro", zero_division=0)
print("Precision:", mlr_precision_3)
mlr_recall_3 = recall_score(y_test, svm_y_hat3, average="macro", zero_division=0)
print("Recall:", mlr_recall_3)
mlr_f1_3 = f1_score(y_test, svm_y_hat3, average="macro", zero_division=0)
print("F1 Score:", mlr_f1_3)
mlr_accuracy_3 = accuracy_score(y_test, svm_y_hat3)
print("Accuracy:", mlr_accuracy_3)
print(classification_report(y_test, svm_y_hat3, zero_division=0))

: 

: 

In [None]:
from sklearn.svm import SVC
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, classification_report

model = SVC(kernel='rbf').fit(X_train_poly, y_train_resampled)

svm_y_hat3 = model.predict(X_test_poly)

mlr_precision_3 = precision_score(y_test, svm_y_hat3, average="macro", zero_division=0)
print("Precision:", mlr_precision_3)
mlr_recall_3 = recall_score(y_test, svm_y_hat3, average="macro", zero_division=0)
print("Recall:", mlr_recall_3)
mlr_f1_3 = f1_score(y_test, svm_y_hat3, average="macro", zero_division=0)
print("F1 Score:", mlr_f1_3)
mlr_accuracy_3 = accuracy_score(y_test, svm_y_hat3)
print("Accuracy:", mlr_accuracy_3)
print(classification_report(y_test, svm_y_hat3, zero_division=0))

In [None]:
import statsmodels.api as sm

def backward_elimination(X, y, sl=0.05, dropped=None):
    if dropped is None:
        dropped = []

    # Fit OLS with an intercept
    X_with_const = sm.add_constant(X)
    X_with_const = X_with_const.astype("float64")
    model = sm.MNLogit(y, X_with_const).fit(disp=False)

    # Extract p-values of x values
    pvalues = model.pvalues.iloc[1:]

    # Find the maximum p-value and corresponding feature
    pvalues = model.pvalues.iloc[1:]  # exclude intercept
    max_pvals = pvalues.max(axis=1)   # max p-value per feature

    # Find the feature with the highest p-value
    max_pval = max_pvals.max()
    if max_pval > sl:
        feature_to_drop = max_pvals.idxmax()
        dropped.append(feature_to_drop)

        # Recurse without that feature
        return backward_elimination(
            X.drop(columns=[feature_to_drop]),
            y,
            sl,
            dropped
        )
    else:
        # Stop recursion; all p-values are below threshold
        retained = list(X.columns)
        return retained, dropped, model

In [None]:
retained_1, dropped_1, final_ols_1 = backward_elimination(X_train_resampled, y_train_resampled, sl=0.10)

print("Retained features:")
print(retained_1)

print("\nDropped features:")
print(dropped_1)

Retained features:
['X1TXMTSCOR', 'X1SES', 'X1MTHINT', 'X1SCIINT', 'X2TXMTSCOR', 'X2BEHAVEIN', 'X2MTHINT_R', 'X2SCIINT', 'X4X2SES', 'S1GOODGRADES', 'S1HRMHOMEWK', 'S1HRSHOMEWK', 'S1HROTHHOMWK', 'S1HRACTIVITY', 'S1HRTV', 'S1HRVIDEO', 'S1HRONLINE', 'A1MTHSCIFAIR', 'A1MSSUMMER', 'A1MSAFTERSCH', 'A1HRTEACHERS', 'A1HRSTUDENT', 'C1PLANPARENT', 'C1SIGNOFF', 'A1MSMENTOR', 'A1MSSPEAKER', 'A1MSFLDTRIP', 'A1MSPRGMS', 'X2PAREDU_2.0', 'X2PAREDU_3.0', 'X2PAREDU_4.0', 'X2PAREDU_5.0', 'X2PAREDU_6.0', 'X2PAREDU_7.0', 'X1PAREDU_2.0', 'X1PAREDU_3.0', 'X1PAREDU_4.0', 'X1PAREDU_5.0', 'X1PAREDU_7.0', 'X3TGPAHIMTH_0.5', 'X3TGPAHIMTH_1.0', 'X3TGPAHIMTH_1.5', 'X3TGPAHIMTH_2.0', 'X3TGPAHIMTH_2.5', 'X3TGPAHIMTH_3.0', 'X3TGPAHIMTH_3.5', 'X3TGPAHIMTH_4.0', 'X3TGPAHISCI_0.5', 'X3TGPAHISCI_1.0', 'X3TGPAHISCI_1.5', 'X3TGPAHISCI_2.0', 'X3TGPAHISCI_2.5', 'X3TGPAHISCI_3.0', 'X3TGPAHISCI_3.5', 'X3TGPAHISCI_4.0', 'X3TGPASTEM_0.5', 'X3TGPASTEM_1.0', 'X3TGPASTEM_1.5', 'X3TGPASTEM_2.0', 'X3TGPASTEM_2.5', 'X3TGPASTEM_3.0', 'X

  eXB = np.column_stack((np.ones(len(X)), np.exp(X)))
  return eXB/eXB.sum(1)[:,None]


In [None]:
retained_5, dropped_5, final_ols_5 = backward_elimination(X_train_resampled, y_train_resampled, sl=0.05)

print("Retained features:")
print(retained_5)

print("\nDropped features:")
print(dropped_5)

Retained features:
['X1TXMTSCOR', 'X1SES', 'X1MTHINT', 'X1SCIINT', 'X2TXMTSCOR', 'X2BEHAVEIN', 'X2MTHINT_R', 'X2SCIINT', 'X4X2SES', 'S1GOODGRADES', 'S1HRMHOMEWK', 'S1HRSHOMEWK', 'S1HROTHHOMWK', 'S1HRACTIVITY', 'S1HRTV', 'S1HRVIDEO', 'S1HRONLINE', 'A1MTHSCIFAIR', 'A1MSSUMMER', 'A1MSAFTERSCH', 'A1HRTEACHERS', 'A1HRSTUDENT', 'C1PLANPARENT', 'C1SIGNOFF', 'A1MSMENTOR', 'A1MSSPEAKER', 'A1MSFLDTRIP', 'A1MSPRGMS', 'X2PAREDU_2.0', 'X2PAREDU_3.0', 'X2PAREDU_4.0', 'X2PAREDU_5.0', 'X2PAREDU_6.0', 'X2PAREDU_7.0', 'X1PAREDU_2.0', 'X1PAREDU_3.0', 'X1PAREDU_4.0', 'X1PAREDU_5.0', 'X1PAREDU_7.0', 'X3TGPAHIMTH_0.5', 'X3TGPAHIMTH_1.0', 'X3TGPAHIMTH_1.5', 'X3TGPAHIMTH_2.0', 'X3TGPAHIMTH_2.5', 'X3TGPAHIMTH_3.0', 'X3TGPAHIMTH_3.5', 'X3TGPAHIMTH_4.0', 'X3TGPAHISCI_0.5', 'X3TGPAHISCI_1.0', 'X3TGPAHISCI_1.5', 'X3TGPAHISCI_2.0', 'X3TGPAHISCI_2.5', 'X3TGPAHISCI_3.0', 'X3TGPAHISCI_3.5', 'X3TGPAHISCI_4.0', 'X3TGPASTEM_0.5', 'X3TGPASTEM_1.0', 'X3TGPASTEM_1.5', 'X3TGPASTEM_2.0', 'X3TGPASTEM_2.5', 'X3TGPASTEM_3.0', 'X

  eXB = np.column_stack((np.ones(len(X)), np.exp(X)))
  return eXB/eXB.sum(1)[:,None]


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, classification_report

X_train_reduced5 = X_train_resampled[retained_5]
X_test_reduced5 = X_test_final[retained_5]

mlr_model3 = LogisticRegression(max_iter=1000, random_state=seed)
mlr_model3.fit(X_train_reduced5, y_train_resampled)

mlr_y_hat3 = mlr_model3.predict(X_test_reduced5)

mlr_precision_3 = precision_score(y_test, mlr_y_hat3, average="macro", zero_division=0)
print("Precision:", mlr_precision_3)
mlr_recall_3 = recall_score(y_test, mlr_y_hat3, average="macro", zero_division=0)
print("Recall:", mlr_recall_3)
mlr_f1_3 = f1_score(y_test, mlr_y_hat3, average="macro", zero_division=0)
print("F1 Score:", mlr_f1_3)
mlr_accuracy_3 = accuracy_score(y_test, mlr_y_hat3)
print("Accuracy:", mlr_accuracy_3)
print(classification_report(y_test, mlr_y_hat3, zero_division=0))

Precision: 0.47829236801624475
Recall: 0.44289267735545723
F1 Score: 0.4519347647545321
Accuracy: 0.451858913250715
              precision    recall  f1-score   support

           0       0.50      0.39      0.43       604
           1       0.45      0.51      0.48      1027
           2       0.36      0.47      0.41       791
           3       0.61      0.41      0.49       725

    accuracy                           0.45      3147
   macro avg       0.48      0.44      0.45      3147
weighted avg       0.47      0.45      0.45      3147



In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, classification_report

mlr_model = LogisticRegression(max_iter=1000, random_state=seed)
mlr_model.fit(X_train_resampled, y_train_resampled)

mlr_y_hat1 = mlr_model.predict(X_test_final)

mlr_precision_1 = precision_score(y_test, mlr_y_hat1, average="macro", zero_division=0)
print("Precision:", mlr_precision_1)
mlr_recall_1 = recall_score(y_test, mlr_y_hat1, average="macro", zero_division=0)
print("Recall:", mlr_recall_1)
mlr_f1_1 = f1_score(y_test, mlr_y_hat1, average="macro", zero_division=0)
print("F1 Score:", mlr_f1_1)
mlr_accuracy_1 = accuracy_score(y_test, mlr_y_hat1)
print("Accuracy:", mlr_accuracy_1)
print(classification_report(y_test, mlr_y_hat1, zero_division=0))

Precision: 0.47829236801624475
Recall: 0.44289267735545723
F1 Score: 0.4519347647545321
Accuracy: 0.451858913250715
              precision    recall  f1-score   support

           0       0.50      0.39      0.43       604
           1       0.45      0.51      0.48      1027
           2       0.36      0.47      0.41       791
           3       0.61      0.41      0.49       725

    accuracy                           0.45      3147
   macro avg       0.48      0.44      0.45      3147
weighted avg       0.47      0.45      0.45      3147



In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

lda = LinearDiscriminantAnalysis()

X_train_lda = lda.fit_transform(X_train_resampled, y_train_resampled)
X_test_lda = lda.transform(X_test_final)

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, classification_report

mlr_model_lda = LogisticRegression(max_iter=1000, random_state=seed, class_weight='balanced')
mlr_model_lda.fit(X_train_lda, y_train_resampled)

mlr_y_hat5 = mlr_model_lda.predict(X_test_lda)

mlr_precision_5 = precision_score(y_test, mlr_y_hat5, average="macro", zero_division=0)
print("Precision:", mlr_precision_5)
mlr_recall_5 = recall_score(y_test, mlr_y_hat5, average="macro", zero_division=0)
print("Recall:", mlr_recall_5)
mlr_f1_5 = f1_score(y_test, mlr_y_hat5, average="macro", zero_division=0)
print("F1 Score:", mlr_f1_5)
mlr_accuracy_5 = accuracy_score(y_test, mlr_y_hat5)
print("Accuracy:", mlr_accuracy_5)
print(classification_report(y_test, mlr_y_hat5, zero_division=0))

Precision: 0.45894614713430415
Recall: 0.4612463434859527
F1 Score: 0.45680863729565696
Accuracy: 0.4496345726088338
              precision    recall  f1-score   support

           0       0.46      0.57      0.51       604
           1       0.45      0.40      0.42      1027
           2       0.36      0.40      0.38       791
           3       0.57      0.48      0.52       725

    accuracy                           0.45      3147
   macro avg       0.46      0.46      0.46      3147
weighted avg       0.46      0.45      0.45      3147



In [None]:
from sklearn.decomposition import PCA

pca18 = PCA(n_components = 18)
X_train_pca18 = pca18.fit_transform(X_train_resampled)
X_test_pca18 = pca18.transform(X_test_final)

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, classification_report

mlr_model_pca = LogisticRegression(max_iter=1000, random_state=seed)
mlr_model_pca.fit(X_train_pca18, y_train_resampled)

mlr_y_hat4 = mlr_model_pca.predict(X_test_pca18)

mlr_precision_4 = precision_score(y_test, mlr_y_hat4, average="macro", zero_division=0)
print("Precision:", mlr_precision_4)
mlr_recall_4 = recall_score(y_test, mlr_y_hat4, average="macro", zero_division=0)
print("Recall:", mlr_recall_4)
mlr_f1_4 = f1_score(y_test, mlr_y_hat4, average="macro", zero_division=0)
print("F1 Score:", mlr_f1_4)
mlr_accuracy_4 = accuracy_score(y_test, mlr_y_hat4)
print("Accuracy:", mlr_accuracy_4)
print(classification_report(y_test, mlr_y_hat4, zero_division=0))

Precision: 0.4284055507554796
Recall: 0.37967583991341225
F1 Score: 0.38313176389005865
Accuracy: 0.4003813155386082
              precision    recall  f1-score   support

           0       0.46      0.21      0.29       604
           1       0.41      0.51      0.46      1027
           2       0.32      0.47      0.38       791
           3       0.52      0.33      0.40       725

    accuracy                           0.40      3147
   macro avg       0.43      0.38      0.38      3147
weighted avg       0.42      0.40      0.39      3147



In [None]:
from sklearn.ensemble import AdaBoostClassifier

ada_model_lda = AdaBoostClassifier(estimator=mlr_model, n_estimators=100, random_state=seed).fit(X_train_resampled, y_train_resampled)

ada_y_hat = ada_model_lda.predict(X_test_final)

ada_precision = precision_score(y_test, ada_y_hat, average="macro", zero_division=0)
print("Precision:", ada_precision)
ada_recall = recall_score(y_test, ada_y_hat, average="macro", zero_division=0)
print("Recall:", ada_recall)
ada_f1 = f1_score(y_test, ada_y_hat, average="macro", zero_division=0)
print("F1 Score:", ada_f1)
ada_accuracy = accuracy_score(y_test, ada_y_hat)
print("Accuracy:", ada_accuracy)
print(classification_report(y_test, ada_y_hat, zero_division=0))

Precision: 0.4446011510182566
Recall: 0.42449580731071035
F1 Score: 0.4300596857274742
Accuracy: 0.42739116619002226
              precision    recall  f1-score   support

           0       0.46      0.40      0.43       604
           1       0.43      0.44      0.44      1027
           2       0.34      0.45      0.39       791
           3       0.54      0.41      0.47       725

    accuracy                           0.43      3147
   macro avg       0.44      0.42      0.43      3147
weighted avg       0.44      0.43      0.43      3147

