<a href="https://colab.research.google.com/github/Riazhatvi/-AI-Text-Based-Mental-Health-Support-System/blob/main/Ai_MHS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# The Initial Step is importing libraries for our project AI Mental Health Support

In [None]:
import json
import nltk
import pandas as pd
import numpy as np
import torch
import plotly.express as px
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.tokenize import word_tokenize
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

# Now the next step is loading data source from Drive, The file is in json format

In [None]:
with open('/content/drive/MyDrive/Mental_Health_Project/Mental_health.json', 'r') as f:
    data = json.load(f)

import pandas as pd

intents_data = data.get('intents', [])
data = pd.DataFrame(intents_data)
print(data)


                   tag                                           patterns  \
0             greeting  [Hi, Hey, Is anyone there?, Hi there, Hello, H...   
1              morning                                     [Good morning]   
2            afternoon                                   [Good afternoon]   
3              evening                                     [Good evening]   
4                night                                       [Good night]   
..                 ...                                                ...   
136          gratitude  [How can I practice gratitude?, Tell me about ...   
137       goal-setting  [How can I set and achieve my goals?, Give me ...   
138         relaxation  [I need relaxation techniques, Help me de-stre...   
139    procrastination  [I keep procrastinating, I need help with time...   
140  positive-thinking  [I want to think more positively, Help me with...   

                                             responses  
0    [Hello there.

# Restructuring data
This code is essentially reshaping the data to ensure that each row in the new DataFrame corresponds to a unique pair of input patterns and responses, along with their associated tag.

This restructuring simplifies the data representation, making it easier to work with in data science tasks.

In [None]:
patterns = []
responses = []
tags = []

for intent in intents_data:
    tag = intent.get('tag', '')
    tags.append(tag)

    for pattern in intent.get('patterns', []):
        patterns.append({'tag': tag, 'pattern': pattern})

    # Check if 'responses' key exists in the current intent
    if 'responses' in intent:
        for response in intent['responses']:
            responses.append({'tag': tag, 'response': response})

print(patterns)
print(responses)
print(tags)



In [None]:
from keras.preprocessing.text import Tokenizer
import pandas as pd
df_new = pd.DataFrame(patterns)
print(df_new)

# Tokenize the patterns
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df_new['pattern'])

# Access the word index
word_index = tokenizer.word_index
print(word_index)




                   tag                           pattern
0             greeting                                Hi
1             greeting                               Hey
2             greeting                  Is anyone there?
3             greeting                          Hi there
4             greeting                             Hello
..                 ...                               ...
453    procrastination  I need help with time management
454    procrastination   How can I stop procrastinating?
455  positive-thinking   I want to think more positively
456  positive-thinking    Help me with positive thinking
457  positive-thinking    I'm tired of negative thoughts

[458 rows x 2 columns]


# In this section we will create a Data Frame and Encode tags

In [None]:
# Encode tags
label_encoder = LabelEncoder()
df_new['tag_encoded'] = label_encoder.fit_transform(df_new['tag'])
print(df_new)



                   tag                           pattern  tag_encoded
0             greeting                                Hi           33
1             greeting                               Hey           33
2             greeting                  Is anyone there?           33
3             greeting                          Hi there           33
4             greeting                             Hello           33
..                 ...                               ...          ...
453    procrastination  I need help with time management           62
454    procrastination   How can I stop procrastinating?           62
455  positive-thinking   I want to think more positively           59
456  positive-thinking    Help me with positive thinking           59
457  positive-thinking    I'm tired of negative thoughts           59

[458 rows x 3 columns]


# Exploratory Data Analysis(EDA) Using EDA we will visiualize our data.

In this line of code we will show the number of tags.

In [None]:
intent_counts = df_new['tag'].value_counts()
fig = px.bar(x=intent_counts.index, y=intent_counts.values, labels={'x': 'Tags', 'y': 'Count'},
             title='Distribution of Intents')
fig.show()

# In this Line of Code we will calculate the average pattern and response counts for each tag and plot means

In [None]:
import plotly.graph_objects as go

# Calculate the average pattern and response counts for each tag
df_new['avg_pattern_count'] = df_new['pattern'].apply(len)
df_new['avg_response_count'] = df_new['tag'].apply(len)

# Plot the average counts using a bar chart
fig = go.Figure()
fig.add_trace(go.Bar(x=df_new['tag'], y=df_new['avg_pattern_count'], name='Average Pattern Count'))
fig.add_trace(go.Bar(x=df_new['tag'], y=df_new['avg_response_count'], name='Average Response Count'))
fig.update_layout(title='Pattern and Response Analysis', xaxis_title='Intents', yaxis_title='Average Count')
fig.show()


# Now the next step is to spilt dataset into training and testing sets

In [None]:
# in this code of line we will split data set
X_train, X_val, y_train, y_val = train_test_split(
    df_new['pattern'], df_new['tag_encoded'], test_size=0.2, random_state=42
)

# In the Below code Vectorize the text data using TF-IDF on the training data set and convert TF-IDF matrices to dense numpy arrays

In [None]:
# we will perform Vectorize the text data using TF-IDF on the training set
vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_val_vec = vectorizer.transform(X_val)

# In this step Convert TF-IDF matrices to dense numpy arrays
X_train_vec = X_train_vec.toarray()
X_val_vec = X_val_vec.toarray()

# Now we will create Neural Network Model

In [None]:
# Define the neural network model
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, dropout_rate=0.5):
        super(NeuralNet, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.dropout = nn.Dropout(dropout_rate)
        self.layer2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = torch.relu(self.layer1(x))
        x = self.dropout(x)
        x = self.layer2(x)
        return x

# In this step we will define dataset and data loader

In [None]:
# we will Define dataset and dataloader
class ChatDataset(Dataset):
    def __init__(self, X, y):
        self.n_samples = len(X)
        self.x_data = torch.Tensor(X)
        self.y_data = torch.LongTensor(y)

    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return self.n_samples

# We will convert pandas series to lists for further processing and assuming input size is the size of the TF-IDF vector

In [None]:
# Convert Pandas Series to lists for further processing
X_train_list = X_train.values.tolist()
X_val_list = X_val.values.tolist()

# we assumie that input_size is the size of the TF-IDF vector
input_size = X_train_vec.shape[1]

# here we have number of unique tags
output_size = len(df_new['tag_encoded'].unique())

# Initialize the model, Optimizer and Criterion after that train model and Validation.

In [None]:
# we will Initialize the model, optimizer, and criterion
model = NeuralNet(input_size, hidden_size=64, output_size=output_size)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# This is the Training stage of model
epochs = 1000
print_interval = 100
batch_size = 32

for epoch in range(epochs):
    for i in range(0, len(X_train_list), batch_size):
        batch_X = torch.tensor(X_train_vec[i:i+batch_size], dtype=torch.float32)
        batch_y = torch.tensor(np.array(y_train[i:i+batch_size]), dtype=torch.long)


        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
if (epoch + 1) % print_interval == 0:
  print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

# we will apply validation
model.eval()
with torch.no_grad():
    val_X = torch.tensor(X_val_vec, dtype=torch.float32)
    val_y = torch.tensor(y_val, dtype=torch.long)
    val_outputs = model(val_X)
    _, predicted = torch.max(val_outputs, 1)
    accuracy = accuracy_score(predicted.numpy(), val_y.numpy())
    print(f'Validation Accuracy: {accuracy * 100:.2f}%')


Epoch [1000/1000], Loss: 0.0093
Validation Accuracy: 70.65%


In [None]:
torch.save(model.state_dict(), 'trained_model.pth')