# Student FeedBack Analysis 

### Importing Libraries

In [100]:
import pandas as pd
import nltk
import re
from nltk.corpus import stopwords 
from nltk.stem.porter import PorterStemmer
from nltk.stem import WordNetLemmatizer

### Reading the dataset

In [101]:
db= pd.read_excel('./Dataset.xlsx')

In [102]:
db.head()

Unnamed: 0,ID,FeedBack,Label
0,1,Good teaching,1
1,2,Explains the concepts nicely.Very Good,1
2,3,punctual and kind teacher,1
3,4,Poor at explaining concepts,0
4,5,Irregular to classes.Bad at teaching,0


In [104]:
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\PRANEETH\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\PRANEETH\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

### Cleaning Data and Preprocessing

In [105]:
ps= PorterStemmer()
corpus = []
wordnet=WordNetLemmatizer()
for i in range(0, len(db)):
    review = re.sub('[^a-zA-Z]', ' ', db['FeedBack'][i])
    review = review.lower()
    review = review.split()
    
    review = [wordnet.lemmatize(word) for word in review ]
    review = ' '.join(review)
    corpus.append(review)


### Tfidf model

In [106]:
from sklearn.feature_extraction.text import TfidfVectorizer
cv = TfidfVectorizer()
X = cv.fit_transform(corpus).toarray()
y=db['Label']


### Splitting data into Train and test 

In [195]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.02, random_state = 42)

### Training model using Naive Bayes(MultinomialNB)

In [196]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score

model = MultinomialNB().fit(X_train, y_train)

y_pred=model.predict(X_test)
ac=accuracy_score(y_test,y_pred)*100

ac


100.0

### Model using Naive Bayes (BernouliNB)

In [197]:
from sklearn.naive_bayes import BernoulliNB
model1 = BernoulliNB().fit(X_train, y_train)

y_pred=model1.predict(X_test)
ac1=accuracy_score(y_test,y_pred)*100

ac1

100.0

### Model using Naive Bayes( GaussianNB) 

In [198]:
from sklearn.naive_bayes import GaussianNB
model2 = GaussianNB().fit(X_train, y_train)

y_pred=model2.predict(X_test)
ac2=accuracy_score(y_test,y_pred)

ac2

0.8571428571428571

### Using Random Forest

In [199]:
from sklearn.ensemble import RandomForestClassifier
model3=RandomForestClassifier(n_estimators=100,min_samples_split=10)

In [200]:
model3.fit(X_train,y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=10,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [201]:
y_pred=model3.predict(X_test)
accuracy_score(y_pred,y_test)

0.8571428571428571

After comparing the four algorithms we can say that Multinoial Naive bayes gives more accuracy

### Predict Function using MultinomialNB model

In [202]:
def pred(string):
    ps= PorterStemmer()
    wordnet=WordNetLemmatizer()
    review = re.sub('[^a-zA-Z]', ' ',string)
    review = review.lower()
    review = review.split()
    
    review = [wordnet.lemmatize(word) for word in review]
    review = ' '.join(review)
    X = cv.transform([review]).toarray()
    y=model.predict(X)
    return y[0]

In [206]:
pred('Worst Teaching')

0

In [207]:
from sklearn.externals import joblib
joblib.dump(model,'model.pkl')

['model.pkl']