In [37]:
import numpy as np
import pandas as pd

from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer

import nltk

In [38]:
df = pd.read_csv('../Datasets/NLP/twitter_training.csv')
df.columns = ['1', 'name', 'result', 'review']

df.drop('1', axis = 1, inplace = True)
df = df[df['name'] == 'Dota2']

df.drop(df[df['review'].isna()].index, inplace = True)

In [39]:
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger_eng')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!


True

In [40]:
X = df['review']
y = df['result']

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = 0.8, random_state = 42)

tfidf = TfidfVectorizer(stop_words = 'english')
cv = CountVectorizer(stop_words = 'english')

from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()

y_train = pd.DataFrame(le.fit_transform(y_train))
y_test = pd.DataFrame(le.transform(y_test))

In [41]:
X_train_tf = tfidf.fit_transform(X_train)
X_train_cv = cv.fit_transform(X_train)

X_test_tf = tfidf.transform(X_test)
X_test_cv = cv.transform(X_test)

In [42]:
tree_tf = DecisionTreeClassifier()
tree_tf.fit(X_train_tf, y_train)
print(classification_report(y_test, tree_tf.predict(X_test_tf)))

              precision    recall  f1-score   support

           0       0.87      0.82      0.84        89
           1       0.86      0.84      0.85       153
           2       0.87      0.82      0.84       112
           3       0.76      0.85      0.80       118

    accuracy                           0.83       472
   macro avg       0.84      0.83      0.83       472
weighted avg       0.84      0.83      0.84       472



In [43]:
tree_cv = DecisionTreeClassifier()
tree_cv.fit(X_train_cv, y_train)
print(classification_report(y_test, tree_cv.predict(X_test_cv)))

              precision    recall  f1-score   support

           0       0.91      0.82      0.86        89
           1       0.86      0.88      0.87       153
           2       0.93      0.87      0.90       112
           3       0.78      0.86      0.82       118

    accuracy                           0.86       472
   macro avg       0.87      0.86      0.86       472
weighted avg       0.87      0.86      0.86       472



In [44]:
review = 'A deeply rewarding masterpiece of strategy and teamwork that offers infinite complexity and some of the most exhilarating competitive highs in gaming history.'
inp = cv.transform([review])
print(le.inverse_transform(tree_cv.predict(inp)))

['Neutral']
