Load DATA 

In [70]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from pgmpy.models import BayesianModel
from pgmpy.estimators import BayesianEstimator
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from pgmpy.estimators import MaximumLikelihoodEstimator
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score



In [63]:
dataset_path = "Mental Health Dataset.csv"
df = pd.read_csv(dataset_path, sep=';')

In [64]:

# Fill missing categorical values with mode
categorical_cols = df.select_dtypes(include=['object']).columns
for col in categorical_cols:
    if df[col].isnull().any():  # Check if there are missing values in the column
        df[col] = df[col].fillna(df[col].mode()[0])  # Fill missing values with mode

# Save the dataset with filled missing values to a CSV file
df.to_csv("filled_dataset.csv", index=False)


# PERFORM ONE HOT ENCODING

In [65]:
# Load the dataset (if not already loaded)
file_path = "filled_dataset.csv"  # Replace "filled_dataset.csv" with the path to your filled dataset
df_filled = pd.read_csv(file_path)

# Initialize LabelEncoder
label_encoder = LabelEncoder()

# Encode categorical variables using label encoding
categorical_cols = df_filled.select_dtypes(include=['object']).columns
for col in categorical_cols:
    df_filled[col] = label_encoder.fit_transform(df[col])

# Display the encoded dataset
df_filled.to_csv("filled_dataset_encoding.csv", index=False)

# Now we get Test,Validation and Train data

In [66]:
# Split the filled dataset into training and test sets
train_data_filled, test_data_filled = train_test_split(df_filled, test_size=0.2, random_state=42)

# Further split the training data into training and validation sets
train_data_filled, val_data_filled = train_test_split(train_data_filled, test_size=0.2, random_state=42)

# Display the shape of each dataset
print("Training data shape:", train_data_filled.shape)
print("Validation data shape:", val_data_filled.shape)
print("Test data shape:", test_data_filled.shape)

Training data shape: (187112, 17)
Validation data shape: (46779, 17)
Test data shape: (58473, 17)


# DEFINING THE MODEL

In [67]:
# Define the structure of the Bayesian network
edges = [
    ('Gender', 'treatment'),
    ('Country', 'treatment'),
    ('Occupation', 'treatment'),
    ('self_employed', 'treatment'),
    ('family_history', 'treatment'),
    ('Days_Indoors', 'treatment'),
    ('Growing_Stress', 'treatment'),
    ('Changes_Habits', 'treatment'),
    ('Mental_Health_History', 'treatment'),
    ('Mood_Swings', 'treatment'),
    ('Coping_Struggles', 'treatment'),
    ('Work_Interest', 'treatment'),
    ('Social_Weakness', 'treatment'),
    ('mental_health_interview', 'treatment'),
    ('care_options', 'treatment')
]


# TRAINING THE MODEL

In [69]:
# Initialize BayesianModel
model = BayesianModel(edges)

# Fit the model using the training data
model.fit(train_data_filled, estimator=MaximumLikelihoodEstimator)



# PREDICTING THE MODEL

In [75]:
# Prepare the validation data by dropping the target variable and any additional variables
validation_features = val_data_filled.drop(columns=['treatment', 'Timestamp'])  # Drop 'treatment' and 'Timestamp'

# Make predictions on the validation set
predictions = model.predict(validation_features)

# Extract the ground truth labels from the validation set
true_labels = val_data_filled['treatment']


  0%|          | 0/23290 [00:00<?, ?it/s]

MemoryError: Unable to allocate 1.37 GiB for an array with shape (2, 3, 2, 35, 5, 2, 3, 3, 3, 5, 3, 3, 3, 2, 3, 2) and data type float64

# Calculating evaluation metrics

In [None]:
# Calculate evaluation metrics
accuracy = accuracy_score(true_labels, predictions)
precision = precision_score(true_labels, predictions)
recall = recall_score(true_labels, predictions)
f1 = f1_score(true_labels, predictions)
roc_auc = roc_auc_score(true_labels, predictions)

# Print the evaluation metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)
print("ROC AUC:", roc_auc