# Question Classification

In [1]:
import pickle
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import csv

In [2]:
def read_csv_to_list(filename):
    questions = []
    with open(filename, 'r', encoding='utf-8') as file:
        csv_reader = csv.reader(file)
        for row in csv_reader:
            if len(row) > 0:  
                questions.append(row[0])  
    return questions

In [3]:
filename_Para = "../datasets/QnA_Para.csv"
questions_Para = read_csv_to_list(filename_Para)
questions_Para

['What is the history of the college?',
 'Describe the history of the college',
 'history of the Fr. CRIT',
 ' history of the college?',
 'What is Vision of your college?',
 'vision of college',
 'vision of the Fr. CRIT',
 'Descibe the vision of college',
 'What is Mission of your college?',
 'mission of Fr. CRIT',
 'mission of college',
 'describe the mission of college',
 'Who is Managing Director of your college?',
 'MD of college',
 'MD of Fr. CRIT',
 'Managing Director of Fr. CRIT',
 'Who is the chairman of your college?',
 'chairman of college',
 'chairman of Fr. CRIT',
 'Who is vice-chairman of your college?',
 'vice chairman of college',
 'vice chairman of Fr. CRIT',
 'Who is the treasurer of your college?',
 'treasurer of college',
 'treasurer of Fr. CRIT',
 'Who is the Secretary of your college?',
 'secretary of college',
 'secretary of Fr. CRIT',
 'Who is the Member of your college?',
 'Who is the Principal of your college?',
 'principal of college',
 'principal of Fr. CRIT'

In [4]:
filename_Table = "../datasets/QnA_Table.csv"
questions_Table = read_csv_to_list(filename_Table)
questions_Table

['What is sum of all packages?',
 'What is average of all packages?',
 'What is count of all packages?',
 'What is highest of all packages?',
 'Which company has highest of all packages?',
 'Which company has highest package?',
 'Which company has lowest package?',
 'What is maximum package?',
 'What is max package?',
 'What is lowest package?',
 'What is least package?',
 'What is minimum package?',
 'Which company gives highest package?',
 'Which company gives lowest package?',
 'Which company has highest number of students placed?',
 'Which company has lowest number of students placed?',
 'Which company has least number of students placed?',
 'Which companies have same package?',
 'What is the package of TCS?',
 'What is the package of Jio?',
 'What is the package of LTFS?',
 'What is the package of Aurionpro?',
 'What is the package of Seclore?',
 'What is the package of Accenture?',
 'What is the package of students placed in Jio?',
 'What is the package of students placed in Secl

In [5]:
labels_Para = ["Para"] * len(questions_Para)
labels_Table = ["Table"] * len(questions_Table)

In [6]:
questions = questions_Para + questions_Table
labels = labels_Para + labels_Table

In [7]:
X_train, X_test, y_train, y_test = train_test_split(questions, labels, test_size=0.2, random_state=42)


In [8]:
vectorizer = CountVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

In [9]:
classifier = LogisticRegression(max_iter=1000)
classifier.fit(X_train_vec, y_train)

with open('../models/classifier_model.pkl', 'wb') as f:
    pickle.dump((classifier, vectorizer), f)

In [10]:
with open('../models/classifier_model.pkl', 'rb') as f:
    classifier, vectorizer = pickle.load(f)

In [11]:
question = "What is package?"
question_vec = vectorizer.transform([question])
predicted_model = classifier.predict(question_vec)[0]
print("Predicted model:", predicted_model)

Predicted model: Table


In [12]:
questions = ["What is highest package?", "package?", "How does it work?","What is package of Facebook?", "Why is it important?"]
question_vec = vectorizer.transform(questions)
predicted_models = classifier.predict(question_vec)
print("Predicted models:", predicted_models)


Predicted models: ['Table' 'Para' 'Para' 'Table' 'Para']
