<a href="https://colab.research.google.com/github/Hariprasath1312/OTP-generator/blob/main/AGROBOT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import pickle as pk
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
import keras
from keras.models import Sequential
from keras.layers import Dense
import re
from nltk.stem.porter import PorterStemmer
from sklearn.metrics import confusion_matrix
import tensorflow as tf

# Load the dataset and prepare it for training the model

# Importing dataset and splitting into words and labels
dataset = pd.read_csv('intents.csv', names=["Query", "Intent"])

queries = dataset["Query"]
intent = list(dataset["Intent"])
unique_intent_list = list(set(intent))

print("Dataset successfully loaded!")

# Text preprocessing for Intent classification
queryCorpus = []
ps = PorterStemmer()

for query in queries:
    query = re.sub('[^a-zA-Z0-9]', ' ', query)

    # Tokenize sentence
    query = query.split(' ')

    # Lemmatizing
    tokenized_query = [ps.stem(word.lower()) for word in query]

    # Recreate the sentence from tokens
    tokenized_query = ' '.join(tokenized_query)

    # Add to corpus
    queryCorpus.append(tokenized_query)

print("Corpus created")

# Bag of words representation for Intent classification
intent_CV = CountVectorizer(max_features=1500)
corpus = intent_CV.fit_transform(queryCorpus).toarray()
print(corpus)
print("Bag of words created!")

# Save the CountVectorizer of Intent
pk.dump(intent_CV, open('intent_vectorizer.pkl', 'wb'))
print('Intent vector saved!')

# Encode the intent classes
labelencoder_intent = LabelEncoder()
intent = labelencoder_intent.fit_transform(intent)
print("Encoded the classes!")

# Map intent labels to their integer values
res = {}
for cl in labelencoder_intent.classes_:
    res.update({cl: labelencoder_intent.transform([cl])[0]})
intent_label_map = res
print(intent_label_map)
print("Label mapping obtained!")

# Split the dataset into the training and test set for Intent classification
query_train, query_test, intent_train, intent_test = train_test_split(corpus, intent, test_size=0.15, random_state=19)

print("Dataset split into train and test set")
print(query_train)
print(intent_train)

# Train the Intent classification model using Gaussian Naive Bayes
classifier = GaussianNB()
classifier.fit(query_train, intent_train)
print("Intent classification model trained successfully!")

# Predict Intent on the test set
intent_pred = classifier.predict(query_test)
print(intent_test)
print(intent_pred)

# Evaluate the performance of the Intent classification model
cm = confusion_matrix(intent_test, intent_pred)

# Model Performance metrics
accuracy = (cm[1][1] + cm[0][0]) / (cm[0][0] + cm[0][1] + cm[1][0] + cm[1][1])
precision = cm[1][1] / (cm[0][0] + cm[0][1])
recall = cm[1][1] / (cm[1][1] + cm[1][0])

print("Intent Classification Model Performance:")
print("Accuracy: ", accuracy)
print("Precision: ", precision)
print("Recall: ", recall)


# Now, let's continue with the neural network for Query classification

# Text preprocessing for Query classification
queryCorpus = []
ps = PorterStemmer()

for query in queries:  # Replace 'X' with the appropriate variable name from your first part
    query = re.sub('[^a-zA-Z]', ' ', query)

    # Tokenize sentence
    query = query.split(' ')

    # Lemmatizing
    tokenized_query = [ps.stem(word.lower()) for word in query]

    # Recreate the sentence from tokens
    tokenized_query = ' '.join(tokenized_query)

    # Add to corpus
    queryCorpus.append(tokenized_query)

print(queryCorpus)
print("Corpus created")

# Bag of words representation for Query classification
countVectorizer = CountVectorizer(max_features=800)
corpus = countVectorizer.fit_transform(queryCorpus).toarray()
print(corpus.shape)
print("Bag of words created!")

# Encode the intent classes for Query classification
labelencoder_intent = LabelEncoder()
intent = labelencoder_intent.fit_transform(intent)
intent = keras.utils.to_categorical(intent)
print("Encoded the classes!")
print(intent)

# Map intent labels to their integer values
res = {}
for cl in labelencoder_intent.classes_:
    res.update({cl: labelencoder_intent.transform([cl])[0]})

intent_label_map = res
print(intent_label_map)
print("Label mapping obtained!")

# Split the dataset into the Training set and Test set for Query classification
X_train, X_test, y_train, y_test = train_test_split(corpus, intent, test_size=0.15, random_state=19)

print("Dataset split into train and test set")
print(X_train.shape)
print(y_train.shape)

# Build and train the neural network for Query classification
classifier = Sequential()

# Adding the input layer and the first hidden layer
classifier.add(Dense(units=96, kernel_initializer='uniform', activation='relu', input_dim=162))

# Adding the second hidden layer
classifier.add(Dense(units=96, kernel_initializer='uniform', activation='relu'))

# Adding the output layer
classifier.add(Dense(units=27, kernel_initializer='uniform', activation='softmax'))

# Compiling the ANN
classifier.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Fitting the ANN to the Training set
classifier.fit(X_train, y_train, batch_size=10, epochs=500)

# Predict the result for Query classification
y_pred = classifier.predict(X_test)
y_pred = np.argmax(y_pred, axis=1)
print("Predictions for Query classification:")
print(y_pred)

# Evaluate the performance of the Query classification model
y_test2 = np.argmax(y_test, axis=1)
cm_query = confusion_matrix(y_test2, y_pred)

# Model Performance metrics for Query classification
accuracy_query = (cm_query[1][1] + cm_query[0][0]) / (cm_query[0][0] + cm_query[0][1] + cm_query[1][0] + cm_query[1][1])
precision_query = cm_query[1][1] / (cm_query[0][0] + cm_query[0][1])
recall_query = cm_query[1][1] / (cm_query[1][1] + cm_query[1][0])

print("Query Classification Model Performance:")
print("Accuracy: ", accuracy_query)
print("Precision: ", precision_query)
print("Recall: ", recall_query)


Dataset successfully loaded!
Corpus created
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 1 0]
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 0 1]]
Bag of words created!
Intent vector saved!
Encoded the classes!
{'AskingHelp': 0, 'BestPractice': 1, 'Cultivation': 2, 'CultivationSeason': 3, 'Diseases': 4, 'EnquireAboutDay': 5, 'Family': 6, 'Fertilizer': 7, 'Greeting': 8, 'HarvestTime': 9, 'Intent': 10, 'Irrigation': 11, 'Location': 12, 'MarketPrice': 13, 'Maturity': 14, 'OutOfScope': 15, 'Pesticide': 16, 'Rainfall': 17, 'ReapingSeason': 18, 'Sarcasm': 19, 'SeedDensity': 20, 'Soil': 21, 'Varieties': 22, 'Wassup': 23, 'Weather': 24, 'Wellness': 25, 'Yield': 26}
Label mapping obtained!
Dataset split into train and test set
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 1 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 1 0]
 [0 0 0 ... 0 0 0]]
[13  1 22  1 19 23 19 22 22  0  6  2 23 24 24  8  6 19 14 23  3 25 26 13
  3 21 22  6  0 18  0 19  2  0 26 17 24 14 15 13  0 11 24 24 1

In [None]:
def trainIntentModel():
    # Load the dataset and prepare it to the train the model

    # Importing dataset and splitting into words and labels
    dataset = pd.read_csv('intents.csv', names=["Query", "Intent"])

    X = dataset["Query"]
    y = dataset["Intent"]

    unique_intent_list = list(set(y))

    print("Intent Dataset successfully loaded!")

    # Clean and prepare the intents corpus
    queryCorpus = []
    ps = PorterStemmer()

    for query in X:
        query = re.sub('[^a-zA-Z]', ' ', query)

        # Tokenize sentence
        query = query.split(' ')

        # Lemmatizing
        tokenized_query = [ps.stem(word.lower()) for word in query]

        # Recreate the sentence from tokens
        tokenized_query = ' '.join(tokenized_query)

        # Add to corpus
        queryCorpus.append(tokenized_query)

    print(queryCorpus)
    print("Corpus created")

    countVectorizer= CountVectorizer(max_features=800)
    corpus = countVectorizer.fit_transform(queryCorpus).toarray()
    print(corpus.shape)
    print("Bag of words created!")

    # Save the CountVectorizer
    pk.dump(countVectorizer, open("CountVectorizer.sav", 'wb'))
    print("Intent CountVectorizer saved!")

    # Encode the intent classes
    labelencoder_intent = LabelEncoder()
    y = labelencoder_intent.fit_transform(y)
    y = keras.utils.to_categorical(y)
    print("Encoded the intent classes!")
    print(y)

    # Return a dictionary, mapping labels to their integer values
    res = {}
    for cl in labelencoder_intent.classes_:
        res.update({cl:labelencoder_intent.transform([cl])[0]})

    intent_label_map = res
    print(intent_label_map)
    print("Intent Label mapping obtained!")

    # Initialising the Aritifcial Neural Network
    classifier = Sequential()

    # Adding the input layer and the first hidden layer
    classifier.add(Dense(units = 96, kernel_initializer = 'uniform', activation = 'relu', input_dim = 162))

    # Adding the second hidden layer
    classifier.add(Dense(units = 96, kernel_initializer = 'uniform', activation = 'relu'))

    # Adding the output layer
    classifier.add(Dense(units = 27, kernel_initializer = 'uniform', activation = 'softmax'))

    # Compiling the ANN
    classifier.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

    # Fitting the ANN to the Training set
    classifier.fit(corpus, y, batch_size = 10, epochs = 500)

    return classifier, intent_label_map
intent_model, intent_label_map = trainIntentModel()

# Save the Intent model
intent_model.save('intent_model.h5')
print("Intent model saved!")


Intent Dataset successfully loaded!
['queri', 'how to grow rice ', 'i want to grow rice ', 'how much water to add for wheat ', 'best season to grow sugarcan', 'yield of wheat', 'averag yield of rice', 'what is the yield of rice in belgaum ', 'best pesticid for cotton ', 'best soil for cotton', 'how much fertil should i add to my rice crop ', 'when to sow cotton ', 'when to harvest wheat ', 'how to enhanc yield of sugarcan ', 'i want to grow paddi ', 'how to grow maiz ', 'which fertil can i use for sugarcan ', 'how is the weather today ', 'how is the weather tomorrow ', 'what s the weather in mysor thi week ', 'weather thi week ', 'weather forecast for the week ', 'how is the climat today ', 'climat condit for the upcom day ', 'weather report for the day ', 'will it rain today ', 'is it go to be sunni thi week ', 'what will the weather be like thi week ', 'can i expect it to be sunni today ', 'which is the season for grow rice ', 'season for grow wheat ', 'how much cotton seed are need 

  saving_api.save_model(


In [None]:
class Balaram:


	def __init__(self):
		import pandas as pd
		import numpy as np

		global pd, np
		global X, y
		global X_train, X_test, y_train, y_test, y_pred
		global label_map

		#print("Instance created successfully!")



	def setup_model(self):
		'''
		Load the dataset and prepare it to the train the model
		'''
		# Importing dataset and splitting into words and labels
		dataset = pd.read_csv('data-tags.csv')
		X = dataset.iloc[:, :-1].values
		y = dataset.iloc[:, 1].values
		X = X.reshape(628,)
		#print("Dataset successfully loaded!")

		# Create a bag of words model for words
		from sklearn.feature_extraction.text import CountVectorizer
		cv = CountVectorizer(max_features=1500)
		X = cv.fit_transform(X.astype('U')).toarray()
		#print("Bag of words created!")

		# Save CountVectorizer state
		import pickle
		filename = 'countVectorizer.sav'
		pickle.dump(cv, open(filename, 'wb'))
		#print("CountVectorizer state saved!")

		# Encoding categorical data of labels
		from sklearn.preprocessing import LabelEncoder
		labelencoder_y = LabelEncoder()
		y = labelencoder_y.fit_transform(y.astype(str))
		#print("Encoded the classes!")

		# Return a dict mapping labels to their integer values
		res = {}
		for cl in labelencoder_y.classes_:
			res.update({cl:labelencoder_y.transform([cl])[0]})

		global label_map
		label_map = res
		#print("Label mapping obtained!")

		# Fit the classifier to dataset
		from sklearn.naive_bayes import GaussianNB
		classifier = GaussianNB()
		classifier.fit(X, y)
		#print("Model trained successfully!")

		# Save trained model state
		filename = 'classifier.sav'
		pickle.dump(classifier, open(filename, 'wb'))
		#print("Trained model saved!")



	def get_entities(self, text):
		'''
		Extract entities from user text
		'''
		# Restore model state to make prediction
		import pickle
		load_cv = pickle.load(open('countVectorizer.sav', 'rb'))
		text = load_cv.transform(text).toarray()
# 		print("CV loaded!")

		load_classifier = pickle.load(open('classifier.sav', 'rb'))
		response_tags = load_classifier.predict(text)

		entity_list=[]
		for tag in response_tags:
			if tag in label_map.values():
				entity_list.append(list(label_map.keys())[list(label_map.values()).index(tag)])

		return entity_list



	def generateResponse(self, text):
		'''
		Generate response for user text
		'''
# 		print("Received: ", text)

		# Extract entities from text
		entities = self.get_entities(text);

# 		print(entities)

		# Mapping between tokens and entity tags
		token_entity_map=dict(zip(entities, text))
		print(token_entity_map)

		# Fetch data from database based on available information provided by user
		self.fetch_data(token_entity_map)


	def fetch_data(self, entity_tags):
		'''
		Fetch the data from database based on received entities
		'''

		if "GREET" in entity_tags:
			if "USR" and "INT" in entity_tags:
				print("vanakkam , "+entity_tags.get("NAME")+"!")
			else:
				print("Vanakkam!........agrobot la erunthu naanu")

		elif "WTR" and "TIME" and not "CROP" or "ADJ" in entity_tags:
			print("Weather for the asked day/week")

		elif "CROP" in entity_tags:
			if "CUL" in entity_tags:
				if "SOIL" in entity_tags:
					print("Crop can be Cultivated in mentioned soil(s)")
				else:
					print("Cultivation info along with season")

			elif "IRR" in entity_tags:
				print("Irrigation info for crop")

			elif "TIME" in entity_tags:
				print("Crop timing")

			elif "FTLZ" in entity_tags:
				print("Fertilizer for crop")

			elif "PEST" in entity_tags:
				print("Pesticide for crop")

			elif "RAIN" in  entity_tags:
				print("Rainfall info for crop")

			elif "SOIL" in entity_tags:
				print("Soil info for crop")

			elif "WTR" in  entity_tags:
				print("Crop weather info")

			elif "SOW" in entity_tags:
				print("Crop sowing info")

			elif "REAP" in entity_tags:
				print("Crop reaping info")

			elif "YLD" in entity_tags:
				print("Crop yield info")

			elif "SEED" and "TYPE" in entity_tags:
				print("Crop seed info")

			elif "COST" in entity_tags:
				print("Market price info of crop")

			elif "QTY" and "MSR" or "SEED" in entity_tags:
				print("Seed density of crop")
ask = Balaram()

# Load data and train model
ask.setup_model()

print("vanakkam!.........")

def process_input(text):
	'''
	Process the user input i.e., remove all punctuation and special symbols.
	Tokenize the input for tagging
	'''
	# Strip text of all symbols
	import re
	text = re.sub('[^A-Za-z]', ' ', text)

	# Convert text to lower
	text = text.lower()

	# Tokenize text into individual words
	text = text.split()

	return text

while True:
	# Get user input
	text = input()

	# Process input
	text = process_input(text)
# 	print(text)

	# Response generation
	ask.generateResponse(text)

vanakkam!.........


In [None]:
def trainEntityModel():
    # Importing dataset and splitting into words and labels
    dataset = pd.read_csv('data-tags.csv')
    X = dataset.iloc[:, :-1].values
    y = dataset.iloc[:, 1].values
#     X = X.reshape(630,)
    print(X)
    print("Entity Dataset successfully loaded!")

    entityCorpus=[]
    ps = PorterStemmer()

    # Stem words in X
    for word in X:
        word = [ps.stem(str(word[0]))]
        entityCorpus.append(word)

    print(entityCorpus)
    X = entityCorpus
    from numpy import array
    X = array(X)
    X = X.reshape(628,1)

    # Create a bag of words model for words
    from sklearn.feature_extraction.text import CountVectorizer

# Convert numpy array to list of strings
    X = X.flatten().tolist()

# Create CountVectorizer
    cv = CountVectorizer(max_features=1500)
    X = cv.fit_transform(X).toarray()

    print("Entity Bag of words created!")

    # Save CountVectorizer state
    pk.dump(cv, open('CountVectorizer.sav', 'wb'))
    print("Entity CountVectorizer state saved!")

    # Encoding categorical data of labels
    labelencoder_y = LabelEncoder()
    y = labelencoder_y.fit_transform(y.astype(str))
    print("Encoded the entity classes!")

    # Return a dict mapping labels to their integer values
    res = {}
    for cl in labelencoder_y.classes_:
        res.update({cl:labelencoder_y.transform([cl])[0]})
    entity_label_map = res
    print("Entity Label mapping obtained!")

    # Fit classifier to dataset
    classifier = GaussianNB()
    classifier.fit(X, y)
    print("Entity Model trained successfully!")

    # Save the entity classifier model
    pk.dump(classifier, open('entity_model.h5', 'wb'))
    print("Trained entity model saved!")

    return entity_label_map
     # Initialising the Aritifcial Neural Network
    classifier = Sequential()

    # Adding the input layer and the first hidden layer
    classifier.add(Dense(units = 96, kernel_initializer = 'uniform', activation = 'relu', input_dim = 162))

    # Adding the second hidden layer
    classifier.add(Dense(units = 96, kernel_initializer = 'uniform', activation = 'relu'))

    # Adding the output layer
    classifier.add(Dense(units = 27, kernel_initializer = 'uniform', activation = 'softmax'))

    # Compiling the ANN
    classifier.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

    # Fitting the ANN to the Training set
    classifier.fit(corpus, y, batch_size = 10, epochs = 500)

    return classifier, intent_label_map

intent_model, intent_label_map = trainIntentModel()

# Load Entity model
entity_label_map = trainEntityModel()
import joblib

# Load CountVectorizer
loadedEntityCV = joblib.load('CountVectorizer.sav')

# Load entity classifier model
loadedEntityClassifier = joblib.load('entity_model.h5')



In [None]:
import numpy as np

def getEntities(query):
    # Assuming loadedEntityCV.transform(query) returns a dense matrix

    # Transform the query using CountVectorizer
    query_transformed = loadedEntityCV.transform(query).toarray()

    # Check and adjust the shape if necessary
    if query_transformed.shape[1] < 149:
        query_transformed = np.pad(query_transformed, ((0, 0), (0, 149 - query_transformed.shape[1])), mode='constant')

    # Make predictions
    response_tags = loadedEntityClassifier.predict(query_transformed)

    entity_list = []
    for tag in response_tags:
        if tag in entity_label_map.values():
            entity_list.append(list(entity_label_map.keys())[list(entity_label_map.values()).index(tag)])

    return entity_list


In [None]:
# Load model to predict user result
from tensorflow.keras.models import load_model

loadedIntentClassifier = load_model('intent_model.h5')
loaded_intent_CV = pk.load(open('CountVectorizer.sav', 'rb'))

USER_INTENT = ""

while True:
    user_query = input()

    query = re.sub('[^a-zA-Z]', ' ', user_query)

    # Tokenize sentence
    query = query.split(' ')

    # Lemmatizing
    ps = PorterStemmer()
    tokenized_query = [ps.stem(word.lower()) for word in query]

    # Recreate the sentence from tokens
    processed_text = ' '.join(tokenized_query)

    # Transform the query using the CountVectorizer
    processed_text = loaded_intent_CV.transform([processed_text]).toarray()
    import numpy as np

       # Assume processed_text has a shape of (1, n_features) based on your previous implementations

# Adjust the shape of processed_text if needed
    if processed_text.shape[1] < 162:
      processed_text = np.pad(processed_text, ((0, 0), (0, 162 - processed_text.shape[1])), mode='constant')

# Make the prediction
    predicted_Intent = loadedIntentClassifier.predict(processed_text)

    result = np.argmax(predicted_Intent, axis=1)


    for key, value in intent_label_map.items():
        if value==result[0]:
            print(key)
            USER_INTENT = key
            break

    # Extract entities from text
    entities = getEntities(tokenized_query)

    # Mapping between tokens and entity tags
    token_entity_map = dict(zip(entities, tokenized_query))
    print(token_entity_map)

