In [1]:
import numpy as np
import pandas as pd
import zipfile
import os



zip_file_path = '/content/archive.zip'

with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall('/content/extracted')


for dirname, _, filenames in os.walk('/content/extracted'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


/content/extracted/model.h5
/content/extracted/intents.json


In [2]:
import json
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder

In [3]:
!pip install google-api-python-client
from googleapiclient.discovery import build
def google_search(query, api_key, cse_id, num=1):
    service = build("customsearch", "v1", developerKey=api_key)
    res = service.cse().list(q=query, cx=cse_id, num=num).execute()
    if 'items' in res:
        return res['items']
    else:
        return []




In [6]:

with open('/content/extracted/intents.json') as file:
    data = json.load(file)


texts = []
intents = []
for intent in data['intents']:
    for text in intent['text']:
        texts.append(text)
        intents.append(intent['intent'])


tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
encoded_texts = tokenizer.texts_to_sequences(texts)


import pickle

with open('tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)


max_len = max([len(x) for x in encoded_texts])
padded_texts = pad_sequences(encoded_texts, maxlen=max_len, padding='post')


le = LabelEncoder()


encoded_intents = le.fit_transform(intents)


num_intents = len(le.classes_)


encoded_intents = tf.one_hot(encoded_intents, depth=num_intents)


input_layer = Input(shape=(max_len,))
embedding_layer = Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=128, input_length=max_len)(input_layer)
lstm_layer = LSTM(128)(embedding_layer)
output_layer = Dense(num_intents, activation='softmax')(lstm_layer)
model = Model(inputs=input_layer, outputs=output_layer)


model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


model.fit(padded_texts, encoded_intents, epochs=50, batch_size=16)

# save model
model.save('chatbot_model34.h5')



Epoch 1/50
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - accuracy: 0.0492 - loss: 3.6532
Epoch 2/50
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.0970 - loss: 3.5150
Epoch 3/50
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.0703 - loss: 3.3956
Epoch 4/50
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.1196 - loss: 3.1108
Epoch 5/50
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.2938 - loss: 2.6265
Epoch 6/50
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.4454 - loss: 1.9958
Epoch 7/50
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5488 - loss: 1.5534
Epoch 8/50
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7193 - loss: 1.1058
Epoch 9/50
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[



In [9]:
import json
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder
import random
from googleapiclient.discovery import build


with open('/content/extracted/intents.json') as file:
    data = json.load(file)


texts = []
intents = []
for intent in data['intents']:
    for text in intent['text']:
        texts.append(text)
        intents.append(intent['intent'])


tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)


model = load_model('/content/chatbot_model34.h5')


max_len = model.input_shape[1]


le = LabelEncoder()
le.fit(intents)


intent_mapping = {i: label for i, label in enumerate(le.classes_)}


GOOGLE_API_KEY = '################'  # Replace with your API key
GOOGLE_CSE_ID = '#################'           # Replace with your Custom Search Engine ID

def google_search(query, api_key, cse_id, num=1):
    service = build("customsearch", "v1", developerKey=api_key)
    res = service.cse().list(q=query, cx=cse_id, num=num).execute()
    if 'items' in res:
        return res['items']
    else:
        return []


print('Welcome dude! Type "quit" to exit.')


recent_responses = []
response_limit = 5  
response_frequency = {} 

while True:
  
    user_input = input("You: ").lower().strip()

    
    if user_input == 'quit':
        print("Chatbot: Goodbye!")
        break

    
    encoded_input = tokenizer.texts_to_sequences([user_input])
    padded_input = pad_sequences(encoded_input, maxlen=max_len, padding='post')

    
    intent_prob = model.predict(padded_input)[0]
    intent_idx = np.argmax(intent_prob)
    confidence = intent_prob[intent_idx]
    intent_label = le.inverse_transform([intent_idx])[0]

    
    CONFIDENCE_THRESHOLD = 0.7  # Adjust based  model's performance

    if confidence < CONFIDENCE_THRESHOLD:
        # Low confidence: Use Google Search
        search_results = google_search(user_input, GOOGLE_API_KEY, GOOGLE_CSE_ID, num=1)
        if search_results:
            top_result = search_results[0]
            response = f"I couldn't find a specific answer, but here's something that might help:\nTitle: {top_result.get('title')}\nLink: {top_result.get('link')}\nSnippet: {top_result.get('snippet')}"
        else:
            response = "I'm sorry, I couldn't find any information related to your query."
    else:
        # High confidence: Use predefined responses
        
        possible_responses = []
        for intent in data['intents']:
            if intent['intent'] == intent_label:
                possible_responses = intent['responses']
                break

        
        available_responses = [resp for resp in possible_responses if resp not in recent_responses]

        
        if available_responses:
            y
            weighted_responses = [(resp, response_frequency.get(resp, 0)) for resp in available_responses]
            total_weight = sum(1 / (freq + 1) for _, freq in weighted_responses) 
            probabilities = [(1 / (freq + 1)) / total_weight for _, freq in weighted_responses]

            response = np.random.choice(available_responses, p=probabilities)
        else:
            response = np.random.choice(possible_responses)  

    
    print('Chatbot:', response)

    
    recent_responses.append(response)
    if len(recent_responses) > response_limit:
        recent_responses.pop(0)

    response_frequency[response] = response_frequency.get(response, 0) + 1




Welcome to the chatbot! Type "quit" to exit.
You: what are the courses offered in your college?
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 98ms/step
Chatbot: Our university offers Information Technology, computer Engineering, Mechanical engineering,Chemical engineering, Civil engineering and extc Engineering.
You: who is arijit singh
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
Chatbot: I couldn't find a specific answer, but here's something that might help:
Title: Arijit Singh - Wikipedia
Link: https://en.wikipedia.org/wiki/Arijit_Singh
Snippet: He is noted for his ability of performing songs in diverse music genres and is acclaimed for his significant contributions to the contemporary Bollywood Music.
You: what is the fee of each semester
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Chatbot: For Fee detail visit <a target="_blank" href="LINK"> here</a>
You: who is elon musk
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━

KeyboardInterrupt: Interrupted by user