In [None]:
import json
import joblib
import nltk
import string
import numpy as np
from catboost import CatBoostClassifier
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer

# Load the intents from the JSON file
with open('intents.json') as file:
    intents = json.load(file)

# Preprocess the data
lemmatizer = WordNetLemmatizer()
stop_words = stopwords.words('english')
def preprocess_text(text):
    text = text.lower()
    tokens = word_tokenize(text)
    tokens = [lemmatizer.lemmatize(token) for token in tokens if token not in stop_words and token not in string.punctuation]
    return ' '.join(tokens)

# Preprocess the intents
patterns = []
classes = []
for intent in intents['intents']:
    for pattern in intent['patterns']:
        patterns.append(preprocess_text(pattern))
        classes.append(intent['tag'])

# Vectorize the data
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(patterns).toarray()
y = np.array(classes)


# Train the model
model = CatBoostClassifier()
model.fit(X, y)

# Save the model using joblib
joblib.dump(model, 'chatbot_model.joblib')
joblib.dump(vectorizer, 'vectorizer.joblib')

# Load the model in a function
def load_chatbot_model():
    model = joblib.load('chatbot_model.joblib')
    return model

# Test the model
model = load_chatbot_model()
while True:
    text = input("You: ")
    X_test = vectorizer.transform([preprocess_text(text)]).toarray()
    prediction = model.predict(X_test)
    for intent in intents['intents']:
        if intent['tag'] == prediction:
            print(f"Bot: {np.random.choice(intent['responses'])}")


Learning rate set to 0.068721
0:	learn: 2.7610969	total: 7.31ms	remaining: 7.3s
1:	learn: 2.7466847	total: 13.7ms	remaining: 6.84s
2:	learn: 2.7379546	total: 20.4ms	remaining: 6.76s
3:	learn: 2.7222636	total: 23.4ms	remaining: 5.83s
4:	learn: 2.7040836	total: 32.9ms	remaining: 6.54s
5:	learn: 2.6885412	total: 42.4ms	remaining: 7.02s
6:	learn: 2.6760378	total: 50.3ms	remaining: 7.13s
7:	learn: 2.6576260	total: 57ms	remaining: 7.06s
8:	learn: 2.6420986	total: 64ms	remaining: 7.04s
9:	learn: 2.6206177	total: 72.2ms	remaining: 7.15s
10:	learn: 2.6049285	total: 79.3ms	remaining: 7.13s
11:	learn: 2.5927801	total: 85.9ms	remaining: 7.07s
12:	learn: 2.5840010	total: 92.8ms	remaining: 7.04s
13:	learn: 2.5721077	total: 99ms	remaining: 6.97s
14:	learn: 2.5574908	total: 106ms	remaining: 6.95s
15:	learn: 2.5377574	total: 112ms	remaining: 6.9s
16:	learn: 2.5263648	total: 119ms	remaining: 6.86s
17:	learn: 2.5174680	total: 125ms	remaining: 6.84s
18:	learn: 2.5006577	total: 132ms	remaining: 6.83s
19:	l

165:	learn: 0.9433981	total: 1.28s	remaining: 6.45s
166:	learn: 0.9375719	total: 1.29s	remaining: 6.43s
167:	learn: 0.9317692	total: 1.3s	remaining: 6.44s
168:	learn: 0.9276004	total: 1.31s	remaining: 6.44s
169:	learn: 0.9225389	total: 1.32s	remaining: 6.44s
170:	learn: 0.9165737	total: 1.32s	remaining: 6.42s
171:	learn: 0.9103823	total: 1.33s	remaining: 6.42s
172:	learn: 0.9055846	total: 1.34s	remaining: 6.41s
173:	learn: 0.9000060	total: 1.35s	remaining: 6.4s
174:	learn: 0.8957740	total: 1.35s	remaining: 6.39s
175:	learn: 0.8928627	total: 1.36s	remaining: 6.38s
176:	learn: 0.8878800	total: 1.37s	remaining: 6.37s
177:	learn: 0.8827056	total: 1.38s	remaining: 6.36s
178:	learn: 0.8776670	total: 1.38s	remaining: 6.35s
179:	learn: 0.8755067	total: 1.39s	remaining: 6.34s
180:	learn: 0.8710006	total: 1.4s	remaining: 6.33s
181:	learn: 0.8672845	total: 1.41s	remaining: 6.33s
182:	learn: 0.8626017	total: 1.42s	remaining: 6.32s
183:	learn: 0.8610433	total: 1.42s	remaining: 6.31s
184:	learn: 0.8

340:	learn: 0.4942958	total: 2.56s	remaining: 4.95s
341:	learn: 0.4928506	total: 2.57s	remaining: 4.94s
342:	learn: 0.4911970	total: 2.57s	remaining: 4.93s
343:	learn: 0.4884015	total: 2.58s	remaining: 4.92s
344:	learn: 0.4861730	total: 2.6s	remaining: 4.93s
345:	learn: 0.4833929	total: 2.6s	remaining: 4.92s
346:	learn: 0.4820262	total: 2.61s	remaining: 4.91s
347:	learn: 0.4810027	total: 2.62s	remaining: 4.91s
348:	learn: 0.4798726	total: 2.63s	remaining: 4.9s
349:	learn: 0.4780077	total: 2.63s	remaining: 4.89s
350:	learn: 0.4765604	total: 2.64s	remaining: 4.88s
351:	learn: 0.4746889	total: 2.65s	remaining: 4.87s
352:	learn: 0.4736592	total: 2.66s	remaining: 4.87s
353:	learn: 0.4724081	total: 2.66s	remaining: 4.86s
354:	learn: 0.4710288	total: 2.67s	remaining: 4.85s
355:	learn: 0.4693340	total: 2.68s	remaining: 4.84s
356:	learn: 0.4679098	total: 2.68s	remaining: 4.83s
357:	learn: 0.4664833	total: 2.69s	remaining: 4.83s
358:	learn: 0.4649481	total: 2.7s	remaining: 4.82s
359:	learn: 0.46

519:	learn: 0.2993576	total: 4.01s	remaining: 3.7s
520:	learn: 0.2987735	total: 4.02s	remaining: 3.7s
521:	learn: 0.2978493	total: 4.03s	remaining: 3.69s
522:	learn: 0.2969862	total: 4.04s	remaining: 3.68s
523:	learn: 0.2965298	total: 4.04s	remaining: 3.67s
524:	learn: 0.2957549	total: 4.05s	remaining: 3.67s
525:	learn: 0.2945020	total: 4.06s	remaining: 3.66s
526:	learn: 0.2936215	total: 4.07s	remaining: 3.65s
527:	learn: 0.2929262	total: 4.07s	remaining: 3.64s
528:	learn: 0.2923273	total: 4.08s	remaining: 3.63s
529:	learn: 0.2911132	total: 4.09s	remaining: 3.62s
530:	learn: 0.2899415	total: 4.09s	remaining: 3.62s
531:	learn: 0.2894266	total: 4.1s	remaining: 3.61s
532:	learn: 0.2888632	total: 4.11s	remaining: 3.6s
533:	learn: 0.2881519	total: 4.11s	remaining: 3.59s
534:	learn: 0.2872544	total: 4.12s	remaining: 3.58s
535:	learn: 0.2867720	total: 4.13s	remaining: 3.57s
536:	learn: 0.2861444	total: 4.13s	remaining: 3.56s
537:	learn: 0.2854649	total: 4.14s	remaining: 3.56s
538:	learn: 0.28

698:	learn: 0.1983097	total: 5.29s	remaining: 2.28s
699:	learn: 0.1980543	total: 5.3s	remaining: 2.27s
700:	learn: 0.1976156	total: 5.31s	remaining: 2.26s
701:	learn: 0.1973554	total: 5.31s	remaining: 2.26s
702:	learn: 0.1969060	total: 5.32s	remaining: 2.25s
703:	learn: 0.1966723	total: 5.33s	remaining: 2.24s
704:	learn: 0.1962774	total: 5.34s	remaining: 2.23s
705:	learn: 0.1958801	total: 5.34s	remaining: 2.22s
706:	learn: 0.1956201	total: 5.35s	remaining: 2.22s
707:	learn: 0.1953482	total: 5.36s	remaining: 2.21s
708:	learn: 0.1950356	total: 5.36s	remaining: 2.2s
709:	learn: 0.1945583	total: 5.37s	remaining: 2.19s
710:	learn: 0.1943149	total: 5.38s	remaining: 2.19s
711:	learn: 0.1938965	total: 5.38s	remaining: 2.18s
712:	learn: 0.1936127	total: 5.39s	remaining: 2.17s
713:	learn: 0.1932756	total: 5.39s	remaining: 2.16s
714:	learn: 0.1927760	total: 5.4s	remaining: 2.15s
715:	learn: 0.1920673	total: 5.41s	remaining: 2.15s
716:	learn: 0.1915796	total: 5.42s	remaining: 2.14s
717:	learn: 0.1

858:	learn: 0.1460597	total: 6.42s	remaining: 1.05s
859:	learn: 0.1457911	total: 6.42s	remaining: 1.05s
860:	learn: 0.1455079	total: 6.43s	remaining: 1.04s
861:	learn: 0.1451954	total: 6.44s	remaining: 1.03s
862:	learn: 0.1449503	total: 6.45s	remaining: 1.02s
863:	learn: 0.1446403	total: 6.46s	remaining: 1.02s
864:	learn: 0.1445087	total: 6.46s	remaining: 1.01s
865:	learn: 0.1441972	total: 6.47s	remaining: 1s
866:	learn: 0.1440338	total: 6.48s	remaining: 994ms
867:	learn: 0.1437505	total: 6.48s	remaining: 986ms
868:	learn: 0.1435289	total: 6.49s	remaining: 978ms
869:	learn: 0.1433609	total: 6.5s	remaining: 971ms
870:	learn: 0.1430624	total: 6.5s	remaining: 963ms
871:	learn: 0.1429046	total: 6.54s	remaining: 961ms
872:	learn: 0.1426640	total: 6.57s	remaining: 956ms
873:	learn: 0.1424442	total: 6.58s	remaining: 949ms
874:	learn: 0.1422558	total: 6.6s	remaining: 943ms
875:	learn: 0.1420517	total: 6.62s	remaining: 938ms
876:	learn: 0.1418196	total: 6.64s	remaining: 931ms
877:	learn: 0.1415