In [1]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'mental-disorder-classification:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F4352146%2F7476679%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240602%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240602T120113Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D1777e13ddd4eecdb4f083e5267589b90fa73925b4bb1627ff6e17e14e217e7f801aa9843548a5d36182fd10626d04b88ec9e44f13d363fa001c8afa658d26720f10dda17abb0d08f3f5e0f34328a32f058d4cfe25143ec94c6066de9078d3963ac53c73f5ee7c21d396f55cfdef7f14df3b44817e349ea1e72c7afee54d8248068ce49df15a728771b3b36a250e62bfdb66374e0b3f5afc9ee3a9e7ba10e1d64e91fdab7f321b53873046d9eef35389f2ca8e44421a3b23ea198fea53970a903614730878f74fac7d7d8e7d27cf9da2a12a87eb8280818463c894a24e1247915f21ef160bfc7ebada658bf9d75407d77bc6a9e41a75bb7f0e73178253a85aa34'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


Downloading mental-disorder-classification, 2226 bytes compressed
Downloaded and uncompressed: mental-disorder-classification
Data source import complete.


In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/mental-disorder-classification/Dataset-Mental-Disorders.csv


In [3]:
data = pd.read_csv("/kaggle/input/mental-disorder-classification/Dataset-Mental-Disorders.csv")
data = data.drop('Patient Number', axis=1)
data.head()

Unnamed: 0,Sadness,Euphoric,Exhausted,Sleep dissorder,Mood Swing,Suicidal thoughts,Anorxia,Authority Respect,Try-Explanation,Aggressive Response,Ignore & Move-On,Nervous Break-down,Admit Mistakes,Overthinking,Sexual Activity,Concentration,Optimisim,Expert Diagnose
0,Usually,Seldom,Sometimes,Sometimes,YES,YES,NO,NO,YES,NO,NO,YES,YES,YES,3 From 10,3 From 10,4 From 10,Bipolar Type-2
1,Usually,Seldom,Usually,Sometimes,NO,YES,NO,NO,NO,NO,NO,NO,NO,NO,4 From 10,2 From 10,5 From 10,Depression
2,Sometimes,Most-Often,Sometimes,Sometimes,YES,NO,NO,NO,YES,YES,NO,YES,YES,NO,6 From 10,5 From 10,7 From 10,Bipolar Type-1
3,Usually,Seldom,Usually,Most-Often,YES,YES,YES,NO,YES,NO,NO,NO,NO,NO,3 From 10,2 From 10,2 From 10,Bipolar Type-2
4,Usually,Usually,Sometimes,Sometimes,NO,NO,NO,NO,NO,NO,NO,YES,YES,YES,5 From 10,5 From 10,6 From 10,Normal


In [4]:
data['Expert Diagnose'].value_counts()

Expert Diagnose
Bipolar Type-2    31
Depression        31
Normal            30
Bipolar Type-1    28
Name: count, dtype: int64

In [5]:
# Handle missing values
data = data.dropna()

# Convert 'Suicidal thoughts' column to string type
data['Suicidal thoughts'] = data['Suicidal thoughts'].astype(str)

# Strip whitespace from 'Suicidal thoughts' column
data['Suicidal thoughts'] = data['Suicidal thoughts'].str.strip()

# Map string values to numerical values
mapping = {'Most-Often': 4, 'Usually': 3, 'Sometimes': 2, 'Seldom': 1, 'YES': 1, 'NO': 0}
data.replace(mapping, inplace=True)

# Print data after mapping
print("Data after mapping:\n", data.head())

# Correct typo in 'Optimisim'
data.rename(columns={'Optimisim': 'Optimism'}, inplace=True)

Data after mapping:
    Sadness  Euphoric  Exhausted  Sleep dissorder  Mood Swing  \
0        3         1          2                2           1   
1        3         1          3                2           0   
2        2         4          2                2           1   
3        3         1          3                4           1   
4        3         3          2                2           0   

   Suicidal thoughts  Anorxia  Authority Respect  Try-Explanation  \
0                  1        0                  0                1   
1                  1        0                  0                0   
2                  0        0                  0                1   
3                  1        1                  0                1   
4                  0        0                  0                0   

   Aggressive Response  Ignore & Move-On  Nervous Break-down  Admit Mistakes  \
0                    0                 0                   1               1   
1                  

In [6]:
import re

numeric_cols = ['Sexual Activity', 'Concentration', 'Optimism']
for col in numeric_cols:
    data[col] = data[col].apply(lambda x: re.findall(r'\d+', str(x))[0]).astype(int)

# Encode the target labels
label_encoder = LabelEncoder()
data['Expert Diagnose'] = label_encoder.fit_transform(data['Expert Diagnose'])

# Print data to verify
print("Data after label encoding:\n", data.head())

Data after label encoding:
    Sadness  Euphoric  Exhausted  Sleep dissorder  Mood Swing  \
0        3         1          2                2           1   
1        3         1          3                2           0   
2        2         4          2                2           1   
3        3         1          3                4           1   
4        3         3          2                2           0   

   Suicidal thoughts  Anorxia  Authority Respect  Try-Explanation  \
0                  1        0                  0                1   
1                  1        0                  0                0   
2                  0        0                  0                1   
3                  1        1                  0                1   
4                  0        0                  0                0   

   Aggressive Response  Ignore & Move-On  Nervous Break-down  Admit Mistakes  \
0                    0                 0                   1               1   
1           

In [7]:
# Split data into features (X) and target (y)
X = data.drop('Expert Diagnose', axis=1)
y = data['Expert Diagnose']

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [8]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers

# Define the model
model = Sequential([
    Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.01), input_shape=(X_train.shape[1],)),
    Dropout(0.5),
    Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
    Dropout(0.5),
    Dense(4, activation='softmax')
])
# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Print a summary of the model
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 128)               2304      
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                                 
 dense_1 (Dense)             (None, 64)                8256      
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense_2 (Dense)             (None, 4)                 260       
                                                                 
Total params: 10820 (42.27 KB)
Trainable params: 10820 (42.27 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [26]:
from tensorflow.keras.callbacks import ModelCheckpoint, Callback

class AccuracyCallback(Callback):
    def on_epoch_end(self, epoch, logs=None):
        if logs.get('val_accuracy') > 0.9:
            print("\nReached 90% accuracy cancelling training!")
            self.model.stop_training = True

# Define the custom accuracy callback
accuracy_callback = AccuracyCallback()

In [28]:
# Train the model
history = model.fit(X_train, y_train, epochs=200, batch_size=16, validation_data=(X_test, y_test), callbacks=[accuracy_callback])

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
1/6 [====>.........................] - ETA: 0s - loss: 0.1421 - accuracy: 1.0000
Reached 90% accuracy cancelling training!


In [29]:
# Make predictions
predictions = model.predict(X_test)
predicted_labels = predictions.argmax(axis=1)

# Map labels back to original strings
predicted_labels = label_encoder.inverse_transform(predicted_labels)
y_test_labels = label_encoder.inverse_transform(y_test)

# Print expected vs actual labels
print("\nExpected vs Actual:")
for i in range(len(X_test)):
    print(f"Expected: {y_test_labels[i]}, Actual: {predicted_labels[i]}")


Expected vs Actual:
Expected: Depression, Actual: Depression
Expected: Bipolar Type-2, Actual: Bipolar Type-2
Expected: Normal, Actual: Normal
Expected: Normal, Actual: Depression
Expected: Normal, Actual: Normal
Expected: Normal, Actual: Depression
Expected: Normal, Actual: Normal
Expected: Normal, Actual: Normal
Expected: Depression, Actual: Depression
Expected: Depression, Actual: Depression
Expected: Depression, Actual: Depression
Expected: Bipolar Type-1, Actual: Bipolar Type-1
Expected: Bipolar Type-1, Actual: Bipolar Type-1
Expected: Bipolar Type-1, Actual: Bipolar Type-1
Expected: Normal, Actual: Normal
Expected: Bipolar Type-2, Actual: Bipolar Type-2
Expected: Depression, Actual: Depression
Expected: Bipolar Type-2, Actual: Bipolar Type-2
Expected: Bipolar Type-1, Actual: Bipolar Type-1
Expected: Bipolar Type-2, Actual: Bipolar Type-2
Expected: Depression, Actual: Depression
Expected: Bipolar Type-2, Actual: Bipolar Type-2
Expected: Depression, Actual: Depression
Expected: De

In [30]:
# Save the model to an HDF5 file
model.save('mental_health_diagnosis_model.h5')
print("Model saved to disk.")

Model saved to disk.


  saving_api.save_model(
