# Import Data From dataset



## Configuring the package import

In [14]:
import sys
sys.path.append("..")  # Add parent directory to path


In [15]:

from datasets import load_dataset, validate_dataset, show_datasets  # Now you can import the package


ds = load_dataset("./../data/categorized_v4_numeric.csv")
ds = validate_dataset(ds)

ds.head()


Unnamed: 0,Q1A,Q1E,Q2A,Q2E,Q3A,Q3E,Q4A,Q4E,Q5A,Q5E,...,gender,age,religion,married,familysize,major_category,depression_score,anxiety_score,stress_score,das_score
0,4,3890,4,2122,2,1944,4,2044,4,2153,...,2,16,12,1,2,0,41,48,54,86
1,4,8118,1,2890,2,4777,3,3090,4,5078,...,2,16,7,1,4,0,38,31,41,66
2,3,5784,1,4373,4,3242,1,6470,4,3927,...,2,17,4,1,3,0,53,26,31,66
3,2,5081,3,6837,2,5521,1,4556,3,3269,...,2,13,4,1,5,6,30,31,30,55
4,2,3215,2,7731,3,4156,4,2802,4,5628,...,2,19,10,1,4,8,46,54,43,86


## Removing unnecessary columns

In [18]:
ds.drop(columns=["country"])
ds.head()

Unnamed: 0,Q1A,Q1E,Q2A,Q2E,Q3A,Q3E,Q4A,Q4E,Q5A,Q5E,...,gender,age,religion,married,familysize,major_category,depression_score,anxiety_score,stress_score,das_score
0,4,3890,4,2122,2,1944,4,2044,4,2153,...,2,16,12,1,2,0,41,48,54,86
1,4,8118,1,2890,2,4777,3,3090,4,5078,...,2,16,7,1,4,0,38,31,41,66
2,3,5784,1,4373,4,3242,1,6470,4,3927,...,2,17,4,1,3,0,53,26,31,66
3,2,5081,3,6837,2,5521,1,4556,3,3269,...,2,13,4,1,5,6,30,31,30,55
4,2,3215,2,7731,3,4156,4,2802,4,5628,...,2,19,10,1,4,8,46,54,43,86


# Training Model

In [20]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import accuracy_score, classification_report

# Define features (X) and multiple targets (y)
X = ds.drop(
    columns=[
        "depression_score",
        "anxiety_score",
        "stress_score",
        "das_score",
        "country",
    ]
)
y = ds[
    ["depression_score", "anxiety_score", "stress_score", "das_score"]
]  # All target columns

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Initialize the MultiOutputClassifier with RandomForestClassifier
# You can adjust n_estimators and other parameters as needed
multi_target_model = MultiOutputClassifier(
    RandomForestClassifier(n_estimators=100, random_state=42)
)

# Train the model
multi_target_model.fit(X_train, y_train)

# Make predictions
y_pred = multi_target_model.predict(X_test)

# Evaluate the model - need to evaluate each target separately
target_names = ["depression_score", "anxiety_score", "stress_score", "das_score"]

for i, target in enumerate(target_names):
    print(f"\nResults for {target}:")
    print(f"Accuracy: {accuracy_score(y_test.iloc[:, i], y_pred[:, i])}")
    print(classification_report(y_test.iloc[:, i], y_pred[:, i]))



Results for depression_score:
Accuracy: 0.2578252671275927
              precision    recall  f1-score   support

          14       0.89      1.00      0.94       182
          15       0.62      0.67      0.64       172
          16       0.35      0.54      0.42       177
          17       0.33      0.30      0.32       183
          18       0.26      0.29      0.28       153
          19       0.19      0.19      0.19       174
          20       0.23      0.20      0.21       171
          21       0.18      0.21      0.19       178
          22       0.20      0.19      0.20       197
          23       0.16      0.18      0.17       186
          24       0.17      0.19      0.18       174
          25       0.19      0.14      0.16       188
          26       0.17      0.19      0.18       170
          27       0.16      0.12      0.14       208
          28       0.23      0.22      0.22       228
          29       0.16      0.17      0.16       199
          30       0.

## Saving the model

```python
import joblib

# Save the trained model to a file
joblib.dump(multi_target_model, 'multi_target_model.pkl')

print("Model saved as 'multi_target_model.pkl'")
```