In [59]:
import joblib
import pandas as pd
import torch
import xgboost as xgb

from statistics import mode

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from torch import nn
from pycaret.classification import load_model, predict_model

In [4]:
df = pd.read_csv('../LLCP2023_clean.csv', keep_default_na=False, dtype='category')

In [5]:
label_encoders = {}
for column in df.columns:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le

In [29]:
X = df.drop(columns=['_MENT14D'])
y = df['_MENT14D']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [30]:
X_test.reset_index(inplace=True, drop=True)
y_test.reset_index(inplace=True, drop=True)

## Load pretrained models

### Pycaret

In [32]:
pycaret_model = load_model('../model-weights/PyCaret_model_num_cat')

Transformation Pipeline and Model Successfully Loaded


### Random forest

In [43]:
rf_model: RandomForestClassifier = joblib.load('../model-weights/compressed_random_forest_model.pkl')

### XGBoost model

In [44]:
xgb_model: xgb.XGBClassifier = xgb.XGBClassifier()
xgb_model.load_model('../model-weights/xgb_model.json')

### Neural Network

In [35]:
class MentalHealthClassifier(nn.Module):
    def __init__(self, input_size, num_classes):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(input_size, 256),
            nn.Tanh(),
            nn.Linear(256, 128),
            nn.Tanh(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, num_classes),
            nn.Softmax(dim=1)
        )

    def forward(self, x):
        return self.linear_relu_stack(x)


input_size = 26  # Number of features
num_classes = 3  # Number of target classes
nn_model = MentalHealthClassifier(input_size, num_classes)

In [36]:
nn_model.load_state_dict(
    torch.load('../model-weights/nn_model.pt', map_location=torch.device('cpu'), weights_only=True))
nn_model.eval()

MentalHealthClassifier(
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=26, out_features=256, bias=True)
    (1): Tanh()
    (2): Linear(in_features=256, out_features=128, bias=True)
    (3): Tanh()
    (4): Linear(in_features=128, out_features=64, bias=True)
    (5): ReLU()
    (6): Linear(in_features=64, out_features=3, bias=True)
    (7): Softmax(dim=1)
  )
)

## Combine models

In [37]:
pred_pycaret = predict_model(pycaret_model, data=X_test)['prediction_label']

In [54]:
pred_pycaret[:3]

0    2
1    1
2    2
Name: prediction_label, dtype: int64

In [45]:
pred_rf = rf_model.predict(X_test)

In [53]:
pred_rf[:3]

array([2, 1, 2])

In [47]:
pred_xgb = xgb_model.predict(X_test)

In [50]:
pred_xgb[:3]

array([2, 1, 2])

In [51]:
pred_nn = nn_model(torch.tensor(X_test.values, dtype=torch.float32)).argmax(axis=1).numpy()

In [52]:
pred_nn[:3]

array([2, 1, 0])

In [55]:
combined_predictions = pd.concat([
    pd.Series(pred_xgb, name="xgb_pred"),
    pd.Series(pred_rf, name="rf_pred"),
    pd.Series(pred_pycaret, name="pycaret_pred"),
    pd.Series(pred_nn, name="nn_pred"),
], axis=1).apply(mode, axis=1)

In [56]:
combined_predictions

0        2
1        1
2        2
3        0
4        2
        ..
16085    2
16086    2
16087    0
16088    0
16089    2
Length: 16090, dtype: int64

In [60]:
accuracy_score(y_test, combined_predictions)

0.7119328775637042

In [62]:
f1_score(y_test, combined_predictions, average='weighted')

0.7079679344133109