## Import Libraries

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from hazm import word_tokenize, stopwords_list
from dadmatools.models.normalizer import Normalizer

## Data Reading

In [None]:
df = pd.read_excel('data/Labeled-Sentences_3class.xlsx')
df

In [None]:
X = df['review'] 
X = X.fillna('')
Y = df['label'] 

from sklearn.model_selection import train_test_split

train_X, test_X, train_Y, test_Y = train_test_split(X, Y, test_size=0.2, random_state=0)
train_X

## Preprocessing and TF-IDF Vectorization

In [None]:
normalize = Normalizer(remove_puncs=True, replace_number_with="")

In [None]:
tfidf = TfidfVectorizer(preprocessor=normalize.normalize,
                        stop_words=stopwords_list(),
                       tokenizer=word_tokenize)
tf_idf_matrix = tfidf.fit_transform(train_X)

In [None]:
tf_idf_matrix.get_shape()

## Data Classification

In [None]:
from sklearn.naive_bayes import MultinomialNB

nb = MultinomialNB()
clf = nb.fit(tf_idf_matrix, train_Y)

In [None]:
tf_idf_test = tfidf.transform(test_X)
preds = clf.predict(tf_idf_test)

In [None]:
from sklearn.metrics import classification_report

classification_report(test_Y, preds, target_names=['good', 'fair', 'excellent'], output_dict=True)

In [None]:
text = test_X.iloc[0]
text

In [None]:
vec = tfidf.transform([text])
clf.predict(vec)