In [None]:
from tkinter import *
import tkinter as tk

In [None]:
data = {"intents": [
             {"tag": "mentor_intro",
              "patterns": ["hello", "hi", "good morning", "who are you", "introduce yourself", "what is your role"],
              "responses": [
                  "Gulshan, I am BPSC Teacher for Gulshan. We will focus on score-maximizing strategy with strict discipline.",
                  "Gulshan, I am your dedicated BPSC mentor. Ask a topic and I will explain it in exam-oriented language.",
                  "Gulshan, I am BPSC Teacher for Gulshan — personal mentor, test generator, and answer evaluator."
              ]
             },
             {"tag": "syllabus_guidance",
              "patterns": ["bpsc syllabus", "what to study for bpsc", "syllabus strategy", "prelims and mains syllabus"],
              "responses": [
                  "Gulshan, start with GS basics: History, Polity, Geography, Economy, Science, and Bihar-specific current affairs. Build notes topic-wise and revise weekly.",
                  "Gulshan, split preparation into Prelims MCQ practice + Mains answer writing. Daily: static subject + Bihar current affairs + PYQ analysis.",
                  "Gulshan, follow a 3-layer plan: concept clarity, PYQ-based practice, and timed revision tests."
              ]
             },
             {"tag": "test_generation",
              "patterns": ["make test", "generate quiz", "topic wise test", "full length test", "practice paper"],
              "responses": [
                  "Gulshan, I can generate a full-length BPSC test with marks distribution, timer, objective and descriptive sections. Tell me the topic.",
                  "Gulshan, choose: (1) 25-question topic quiz, (2) 50-question mixed GS test, or (3) Mains descriptive test.",
                  "Gulshan, I will prepare a BPSC-pattern test. Share subject, level, and available time."
              ]
             },
             {"tag": "answer_evaluation",
              "patterns": ["check my answer", "evaluate this answer", "give marks", "review my mains answer"],
              "responses": [
                  "Gulshan, submit your answer. I will give marks, strengths, mistakes, missing points, model answer, and improvement strategy.",
                  "Gulshan, write in intro-body-conclusion format. I will evaluate it like a BPSC examiner with topper-copy style feedback.",
                  "Gulshan, send your response and I will provide strict but constructive evaluation."
              ]
             },
             {"tag": "revision_task",
              "patterns": ["give revision task", "daily task", "micro test", "quick revision"],
              "responses": [
                  "Gulshan, today's task: revise one Bihar polity topic, solve 20 MCQs, and write one 150-word mains answer in 12 minutes.",
                  "Gulshan, micro-test: 10 MCQs + 1 short note. Target completion in 20 minutes with self-review.",
                  "Gulshan, revision drill: 30 minutes static GS + 15 minutes Bihar current affairs + 15 minutes PYQ analysis."
              ]
             },
             {"tag": "goodbye",
              "patterns": ["bye", "g2g", "see ya", "adios", "cya"],
              "responses": [
                  "Gulshan, session closed. Revise what we discussed and return with answers for evaluation.",
                  "Good work today, Gulshan. Stay consistent and practice timed answers.",
                  "Gulshan, end of session. Tomorrow we continue with a focused test and feedback cycle."
              ]
             }
]}


In [None]:
import json
import string
import random 
import nltk
import numpy as np
from nltk.stem import WordNetLemmatizer 
import tensorflow as tf 
from tensorflow.keras import Sequential 
from tensorflow.keras.layers import Dense, Dropout
nltk.download("punkt")
nltk.download("wordnet")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.


True

In [None]:
lemmatizer = WordNetLemmatizer()
# Each list to create
words = []
classes = []
doc_X = []
doc_y = []
# Loop through all the intents
# tokenize each pattern and append tokens to words, the patterns and
# the associated tag to their associated list
for intent in data["intents"]:
    for pattern in intent["patterns"]:
        tokens = nltk.word_tokenize(pattern)
        words.extend(tokens)
        doc_X.append(pattern)
        doc_y.append(intent["tag"])
    
    # add the tag to the classes if it's not there already 
    if intent["tag"] not in classes:
        classes.append(intent["tag"])
# lemmatize all the words in the vocab and convert them to lowercase
# if the words don't appear in punctuation
words = [lemmatizer.lemmatize(word.lower()) for word in words if word not in string.punctuation]
# sorting the vocab and classes in alphabetical order and taking the # set to ensure no duplicates occur
words = sorted(set(words))
classes = sorted(set(classes))

In [None]:
training = []
out_empty = [0] * len(classes)
# creating the bag of words model
for idx, doc in enumerate(doc_X):
    bow = []
    text = lemmatizer.lemmatize(doc.lower())
    for word in words:
        bow.append(1) if word in text else bow.append(0)
    # mark the index of class that the current pattern is associated
    # to
    output_row = list(out_empty)
    output_row[classes.index(doc_y[idx])] = 1
    # add the one hot encoded BoW and associated classes to training 
    training.append([bow, output_row])
# shuffle the data and convert it to an array
random.shuffle(training)
training = np.array(training, dtype=object)
# split the features and target labels
train_X = np.array(list(training[:, 0]))
train_y = np.array(list(training[:, 1]))


In [None]:
input_shape = (len(train_X[0]),)
output_shape = len(train_y[0])
epochs = 200
# the deep learning model
model = Sequential()
model.add(Dense(128, input_shape=input_shape, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(64, activation="relu"))
model.add(Dropout(0.3))
model.add(Dense(output_shape, activation = "softmax"))
adam = tf.keras.optimizers.Adam(learning_rate=0.01, decay=1e-6)
model.compile(loss='categorical_crossentropy',
              optimizer=adam,
              metrics=["accuracy"])
print(model.summary())
model.fit(x=train_X, y=train_y, epochs=200, verbose=1)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 128)               5120      
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                                 
 dense_1 (Dense)             (None, 64)                8256      
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense_2 (Dense)             (None, 5)                 325       
                                                                 
Total params: 13,701
Trainable params: 13,701
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200

<keras.callbacks.History at 0x7f8265a4c710>

In [None]:
def clean_text(text): 
  tokens = nltk.word_tokenize(text)
  tokens = [lemmatizer.lemmatize(word) for word in tokens]
  return tokens

def bag_of_words(text, vocab): 
  tokens = clean_text(text)
  bow = [0] * len(vocab)
  for w in tokens: 
    for idx, word in enumerate(vocab):
      if word == w: 
        bow[idx] = 1
  return np.array(bow)

def pred_class(text, vocab, labels): 
  bow = bag_of_words(text, vocab)
  result = model.predict(np.array([bow]))[0]
  thresh = 0.2
  y_pred = [[idx, res] for idx, res in enumerate(result) if res > thresh]

  y_pred.sort(key=lambda x: x[1], reverse=True)
  return_list = []
  for r in y_pred:
    return_list.append(labels[r[0]])
  return return_list

def get_response(intents_list, intents_json): 
  if not intents_list:
    return "Gulshan, ask a specific BPSC topic, test request, or answer-evaluation prompt so I can guide you better."

  tag = intents_list[0]
  list_of_intents = intents_json["intents"]
  for i in list_of_intents: 
    if i["tag"] == tag:
      return random.choice(i["responses"])

  return "Gulshan, I did not match that clearly. Please rephrase with a BPSC-focused request."



In [None]:
while True:
    message = input("")
    intents = pred_class(message, words, classes)
    result = get_response(intents, data)
    print(result)
    

Hello
I'm Kippi
