In [1]:
import pandas as pd
import numpy as np
import ast
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

In [24]:
json = pd.read_json('intents.json')

In [29]:
# json['intents'][0]['tag'],json['intents'][0]['patterns'],json['intents'][0]['responses']

In [26]:
df = pd.DataFrame([], columns=['tag','pattern','response'])

In [27]:
for i, row in enumerate(json['intents']):
    # print(row['patterns'])
    df.loc[i,'tag'],df.loc[i,'pattern'],df.loc[i,'response'] = row['tag'],row['patterns'],row['responses']

In [28]:
df.to_csv('chatbot.csv')

In [21]:

# Load the dataset (assuming it's in the same directory)
file_path = 'chatbot.csv'  # Adjust this to your file path
chatbot_data = pd.read_csv(file_path)

# Preprocess the dataset
chatbot_data['pattern'] = chatbot_data['pattern'].apply(lambda x: ast.literal_eval(x))

# Combine all patterns into a single list
patterns = [pattern for sublist in chatbot_data['pattern'] for pattern in sublist]

# Create corresponding tags for each pattern
tags = [tag for tag, sublist in zip(chatbot_data['tag'], chatbot_data['pattern']) for _ in sublist]

# Encode the tags using LabelEncoder
label_encoder = LabelEncoder()
encoded_tags = label_encoder.fit_transform(tags)

# print(patterns)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(patterns, encoded_tags, test_size=0.2,shuffle=False, random_state=42)

# Convert patterns into numerical vectors using TF-IDF vectorization
tfidf_vectorizer = TfidfVectorizer()
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train).toarray()
X_test_tfidf = tfidf_vectorizer.transform(X_test).toarray()

# Get the number of unique tags and features
num_classes = len(np.unique(encoded_tags))
input_dim = X_train_tfidf.shape[1]



In [22]:
# Build the neural network model
model = Sequential([
    Dense(128, input_dim=input_dim, activation='relu'),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
history = model.fit(X_train_tfidf, y_train, epochs=150, batch_size=8, validation_data=(X_test_tfidf, y_test), verbose=1)

# Save the model and label encoder for later use
model.save('chatbot_model.h5')
import pickle
with open('label_encoder.pkl', 'wb') as file:
    pickle.dump(label_encoder, file)

with open('tfidf_vectorizer.pkl', 'wb') as file:
    pickle.dump(tfidf_vectorizer, file)

print("Model training complete and saved.")

# You can load the model and use it for inference later with:
# from tensorflow.keras.models import load_model
# model = load_model('chatbot_model.h5')


Epoch 1/150


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.0159 - loss: 3.6214 - val_accuracy: 0.0000e+00 - val_loss: 3.7107
Epoch 2/150
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.0979 - loss: 3.5291 - val_accuracy: 0.0000e+00 - val_loss: 3.8909
Epoch 3/150
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.1190 - loss: 3.3568 - val_accuracy: 0.0000e+00 - val_loss: 4.3554
Epoch 4/150
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.1909 - loss: 3.1106 - val_accuracy: 0.0000e+00 - val_loss: 4.9998
Epoch 5/150
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.2275 - loss: 2.9021 - val_accuracy: 0.0000e+00 - val_loss: 5.4395
Epoch 6/150
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.2557 - loss: 2.8353 - val_accuracy: 0.0000e+00 - val_loss: 5.8425
Epoch 7/150
[1m41/41



Model training complete and saved.
