In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import nltk

pd.set_option('display.max_columns',None)

In [None]:
data = pd.read_csv("dataset.csv")

In [None]:
data.isnull().sum()

Sentence    0
Type        0
dtype: int64

In [None]:
data.duplicated().sum()

758

In [None]:
data.drop_duplicates(inplace=True,keep=False)

In [None]:
y = data['Type']
data = data.drop(['Type'], axis=1)

In [None]:
from sklearn.preprocessing import LabelEncoder
label_enc = LabelEncoder()
y = label_enc.fit_transform(y)

In [None]:
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords

corpus = []
lemmatizer = WordNetLemmatizer()
sw = set(stopwords.words('english'))

for i in range(len(data.index)):
    text = ""
    for t in data.iloc[i,:]:
        t = t.lower()
        temp = t.split()
        temp = [lemmatizer.lemmatize(word) for word in temp if word not in sw]
        t = " ".join(temp)
        text = text + " " + t
    corpus.append(text)

In [None]:
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
cv = CountVectorizer(max_features=5000, ngram_range=(1,3))
tfidf = TfidfVectorizer(max_features=5000, ngram_range=(1,3))

In [None]:
X_cv = cv.fit_transform(corpus).toarray()
X_tfidf = tfidf.fit_transform(corpus).toarray()

In [None]:
from sklearn.model_selection import train_test_split
X_train_cv, X_test_cv, y_train_cv, y_test_cv = train_test_split(X_cv, y, test_size = 0.1)
X_train_tfidf, X_test_tfidf, y_train_tfidf, y_test_tfidf = train_test_split(X_tfidf, y, test_size = 0.1)

In [None]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
classifier_cv = MultinomialNB()
classifier_tfidf = MultinomialNB()
ls_classifier = LogisticRegression()
classifier_cv.fit(X_train_cv, y_train_cv)
classifier_tfidf.fit(X_train_tfidf, y_train_tfidf)
ls_classifier.fit(X_train_cv, y_train_cv)

In [None]:
y_pred_cv = classifier_cv.predict(X_test_cv)
y_pred_tfidf = classifier_tfidf.predict(X_test_tfidf)
y_pred_ls = ls_classifier.predict(X_test_cv)

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
score_cv = accuracy_score(y_test_cv, y_pred_cv)
print(f"MNB CV Accuracy: {score_cv}")
cm_cv = confusion_matrix(y_test_cv, y_pred_cv)
print(cm_cv)

score_tfidf = accuracy_score(y_test_tfidf, y_pred_tfidf)
print(f"MNB TFID Accuracy: {score_tfidf}")
cm_tfidf = confusion_matrix(y_test_tfidf, y_pred_tfidf)
print(cm_tfidf)

score_ls = accuracy_score(y_test_cv, y_pred_ls)
print(f"Logistic Accuracy: {score_ls}")
cm_ls = confusion_matrix(y_test_cv, y_pred_ls)
print(cm_ls)

MNB CV Accuracy: 0.9376712328767123
[[422  15  12]
 [ 20 367  12]
 [ 23   9 580]]
MNB TFID Accuracy: 0.9178082191780822
[[437  18  29]
 [ 15 410  28]
 [ 16  14 493]]
Logistic Accuracy: 0.9746575342465753
[[434   8   7]
 [  5 390   4]
 [  6   7 599]]


In [None]:
import pickle

pickle.dump(ls_classifier, open("nakmodel.pkl", "wb"))
pickle.dump(cv, open("cv.pkl", "wb"))
pickle.dump(label_enc, open("encoder.pkl", "wb"))

In [None]:
cv_unpickled = pickle.load(open("cv.pkl", "rb"))
model_unpickled = pickle.load(open("nakmodel.pkl", "rb"))
le_unpickled = pickle.load(open("encoder.pkl", "rb"))

In [None]:
def clean(t):
    t = t.lower()
    temp = t.split()
    temp = [lemmatizer.lemmatize(word) for word in temp if word not in sw]
    t = " ".join(temp)
    return t

def encode(text, cv):
    encoded_text = cv_unpickled.transform(list([text])).toarray()
    return encoded_text

def decode(result, label_encoder):
    decoded_result = le_unpickled.inverse_transform(result)[0]
    return decoded_result

In [None]:
sentences = [
             "Brilliant! I can't wait to hear the news about this change!", # Auditory
             "Chill out guys, nothing's gonna change, we have to study hard to succeed", # Kinesthetic
             "Can you imagine how this idea's gonna change the education system!", # Visual
             "I like to try out things to understand how it works, 'experiencing' is my power to learn", #Kinesthetics
             "All what we do is visualizing how things work and imagine new possibilities", #Visual
             "I hear the wind call my name, the sound that leads me home again", #Auditory
            ]



for sentence in sentences:
    print(sentence)
    sentence = clean(sentence)
    encoded = encode(sentence, cv_unpickled)
    pred = model_unpickled.predict(encoded)
    result = decode(pred, label_enc)
    print(f"{result} \n")

Brilliant! I can't wait to hear the news about this change!
Auditory 

Chill out guys, nothing's gonna change, we have to study hard to succeed
Kinesthetic 

Can you imagine how this idea's gonna change the education system!
Visual 

I like to try out things to understand how it works, 'experiencing' is my power to learn
Kinesthetic 

All what we do is visualizing how things work and imagine new possibilities
Visual 

I hear the wind call my name, the sound that leads me home again
Auditory 

