In [2]:
import streamlit as st
import numpy as np
import pandas as pd


In [3]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
import random

import json


In [4]:
# Load the intents data from a JSON file
with open(r'C:\Users\USER\Downloads\intents.json', 'r') as f:
    data = json.load(f)


In [5]:
# Read the dataset into a pandas DataFrame
df = pd.read_json(r'C:\Users\USER\Downloads\intents.json')

In [6]:
df

Unnamed: 0,intents
0,"{'tag': 'greeting', 'patterns': ['Hi', 'Hey', ..."
1,"{'tag': 'morning', 'patterns': ['Good morning'..."
2,"{'tag': 'afternoon', 'patterns': ['Good aftern..."
3,"{'tag': 'evening', 'patterns': ['Good evening'..."
4,"{'tag': 'night', 'patterns': ['Good night'], '..."
...,...
75,"{'tag': 'fact-28', 'patterns': ['What do I do ..."
76,"{'tag': 'fact-29', 'patterns': ['How do I know..."
77,"{'tag': 'fact-30', 'patterns': ['How can I mai..."
78,"{'tag': 'fact-31', 'patterns': ['What's the di..."


In [7]:
# New intents data for adding to the existing dataset
new_intents_data = {
    "tag": ["symptom"] * 4,  
    "patterns": [
        "I have a headache",
        "I feel dizzy",
        "I'm not feeling well",
        "I have a cough"
    ],
    "responses": [
        "I'm sorry to hear that you're not feeling well. It's best to consult with a healthcare professional.",
        "Headaches can be caused by various factors. Make sure to stay hydrated and consider resting.",
        "Dizziness can occur due to various reasons; please consult a doctor if it persists.",
        "Coughing can be a sign of various conditions. If it continues, please seek medical advice."
    ]
}

In [8]:
# Convert the new intents data into a DataFrame and append it to the original dataset
new_intents_df = pd.DataFrame(new_intents_data)
df = pd.concat([df, new_intents_df], ignore_index=True)

In [9]:
# Create a dictionary to hold all the intent data in a more structured format
dic = {"tag": [], "patterns": [], "responses": []}


In [10]:
# Loop through the original dataset and extract the tag, patterns, and responses
for intent in data['intents']:
    tag = intent['tag']
    patterns = intent['patterns']
    responses = intent['responses']
    for pattern in patterns:
        dic['tag'].append(tag)
        dic['patterns'].append(pattern)
        dic['responses'].append(responses)  

In [11]:
# Convert the dictionary to a DataFrame for easier manipulation
df = pd.DataFrame.from_dict(dic)

In [12]:

import matplotlib.pyplot as plt

# Visualize the frequency of each tag (intent)
tag_counts = df['tag'].head(100).value_counts()
plt.figure(figsize=(10, 6))
tag_counts.plot(kind='bar', color='skyblue')
plt.title("Frequency of Each Intent (Tag)")
plt.xlabel("Intent (Tag)")
plt.ylabel("Frequency")
plt.xticks(rotation=45)

# Display the plot
plt.show()

  plt.show()


In [13]:
df['pattern_length'] = df['patterns'].apply(len)
plt.figure(figsize=(10, 6))
df['pattern_length'].hist(bins=20, color='lightgreen')
plt.title("Distribution of Pattern Lengths")
plt.xlabel("Length of Pattern (Number of Characters)")
plt.ylabel("Frequency")
plt.show()

  plt.show()


In [14]:
# Define features (X) and target variable (y)
X = df['patterns']
y = df['tag']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [15]:
# Vectorize the text data using TfidfVectorizer
vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

In [16]:
# Train the model using a Support Vector Machine (SVM)
model = SVC()
model.fit(X_train_vec, y_train)

In [17]:
# Function to predict the intent of user input
def predict_intent(user_input):
    user_input_vec = vectorizer.transform([user_input])  # Vectorize user input
    intent = model.predict(user_input_vec)[0]  # Predict intent
    return intent


In [18]:
# Function to generate a response based on the predicted intent
def generate_response(intent):
    possible_responses = df[df['tag'] == intent]['responses'].values[0]  # Get possible responses for the intent
    response = random.choice(possible_responses)  # Randomly select one response
    return response

In [19]:
# Streamlit user interface
st.title("Chatbot")
st.write("Chatbot for mental health conversation.")


2024-12-03 16:51:44.004 
  command:

    streamlit run C:\Users\USER\AppData\Roaming\Python\Python313\site-packages\ipykernel_launcher.py [ARGUMENTS]


In [20]:
# Get user input
user_input = st.text_input("You:", "")

# If user input is provided, predict intent and generate response
if user_input:
    intent = predict_intent(user_input)
    response = generate_response(intent)
    st.write("Chatbot:", response)

2024-12-03 16:52:25.368 Session state does not function when running a script without `streamlit run`
