# Requirements

In [2]:
! pip install contractions
! pip install unidecode
import unidecode
from bs4 import BeautifulSoup
from nltk import word_tokenize, sent_tokenize, RegexpTokenizer
from nltk.corpus import stopwords
import nltk
from nltk.stem import PorterStemmer, WordNetLemmatizer
# import sklearn diffrent algorithms
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler
# other necessary libraries
import pickle as pk
import pandas as pd
import re
import numpy as np
from gensim.models import KeyedVectors
# this module just prevents warnings to be shown and get output messy
import warnings
warnings.simplefilter("ignore")

# download nltk resources
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')

Collecting contractions
  Downloading contractions-0.0.52-py2.py3-none-any.whl (7.2 kB)
Collecting textsearch>=0.0.21
  Downloading textsearch-0.0.21-py2.py3-none-any.whl (7.5 kB)
Collecting pyahocorasick
  Downloading pyahocorasick-1.4.2.tar.gz (321 kB)
[K     |████████████████████████████████| 321 kB 34.1 MB/s 
[?25hCollecting anyascii
  Downloading anyascii-0.3.0-py3-none-any.whl (284 kB)
[K     |████████████████████████████████| 284 kB 53.9 MB/s 
[?25hBuilding wheels for collected packages: pyahocorasick
  Building wheel for pyahocorasick (setup.py) ... [?25l[?25hdone
  Created wheel for pyahocorasick: filename=pyahocorasick-1.4.2-cp37-cp37m-linux_x86_64.whl size=85453 sha256=70a606402cd3745c0c5985cfeee6e249538fb3b03eb1e5f7789b9d0df362611b
  Stored in directory: /root/.cache/pip/wheels/25/19/a6/8f363d9939162782bb8439d886469756271abc01f76fbd790f
Successfully built pyahocorasick
Installing collected packages: pyahocorasick, anyascii, textsearch, contractions
Successfully instal

True

In [3]:
# download dtaset from drive
! gdown --id 1-1qV9uGkvK-RRIF35e5sxnIFQArI1mA8
# read dataset
dataset = pd.read_csv('dataset.csv')

Downloading...
From: https://drive.google.com/uc?id=1-1qV9uGkvK-RRIF35e5sxnIFQArI1mA8
To: /content/dataset.csv
100% 59.7M/59.7M [00:00<00:00, 129MB/s]


#  PART A 

## In this section i define 3 levels of preprocessing then using bag of words vectorization we turn dataset to vectors and apply 3 diffrent algorithms (LR, SVM, KNN) on all 3 levels of preprocessing

In [None]:
# define class of preprocessing . three methods of this class do 3 diffrent level of preprocessing
class Preprocessing:
    
    def __init__(self,level):
        self.level = level
        
    def level_one_process(self,sent):
        return word_tokenize(sent)
    
    def level_two_process(self,sent):
        # delete numbers
        sent = re.sub("\d+", "", sent)
        # to lowercase
        sent = sent.lower()
        # tokenization and deleting punctuation marks
        tokenizer = RegexpTokenizer(r'\w+')
        return tokenizer.tokenize(sent)
        
    def level_three_process(self,sent):
        # remove html
        sent = self.remove_html_tags(sent)
        # convert accented
        sent = self.remove_accented_chars(sent)
        # apply level 2 processing first
        words = self.level_two_process(sent)
        # find and delete stop words
        stop_words = stopwords.words('english')
        new_words = [w for w in words if not w in stop_words]
        # lemmatization
        lemmatizer = WordNetLemmatizer()
        new_words = list(map(lemmatizer.lemmatize,new_words))
        return new_words
        
    def preprocess_sentence(self,sent):
        if self.level == 1:
            processed_sent = self.level_one_process(sent)
        elif self.level == 2:
            processed_sent = self.level_two_process(sent)
        elif self.level == 3:
            processed_sent = self.level_three_process(sent)
            
        return processed_sent

    def remove_html_tags(self,sent):
      soup = BeautifulSoup(sent, "html.parser")
      stripped_sent = soup.get_text(separator=" ")
      return stripped_sent

    def remove_accented_chars(self,sent):
      return unidecode.unidecode(sent)

In [None]:
# a class to generate bag-of-words vectors for words
class BagOfWords:
    def __init__(self,min_freq):
        self.min_freq = min_freq
    
    def make_vactors(self,text):
        CV = CountVectorizer(min_df=self.min_freq)
        bow = CV.fit_transform(text)
        return bow

In [None]:
# a function to produce results
def analysis(labels,predictions):
    print("Report Classification\n",classification_report(labels,predictions,target_names=["positive","negative"]))
    print("Matrix Confusion\n",confusion_matrix(labels,predictions))
    print("Accuracy\n",accuracy_score(labels,predictions))

In [None]:
# put all comments in all_text and all labels in all_label
all_text = list(dataset['comment'].values)
all_label = list(dataset['sentiment'].values)

In [None]:
# apply level-1 preprocessing on text
level_1_processed_text = []
p = Preprocessing(level=1)
for text in all_text:
    level_1_processed_text.append(' '.join(p.preprocess_sentence(text)))

# apply level-2 preprocessing on text
level_2_processed_text = []
p = Preprocessing(level=2)
for text in all_text:
    level_2_processed_text.append(' '.join(p.preprocess_sentence(text)))

# apply level-3 preprocessing on text
level_3_processed_text = []
p = Preprocessing(level=3)
for text in all_text:
    level_3_processed_text.append(' '.join(p.preprocess_sentence(text)))

# make bow objects of 3 levels of text
bow_obj = BagOfWords(min_freq=20)
bow_level_1_text = bow_obj.make_vactors(level_1_processed_text)
bow_level_2_text = bow_obj.make_vactors(level_2_processed_text)
bow_level_3_text = bow_obj.make_vactors(level_3_processed_text)

# split data to train-val & test
x_train_val_level_1, x_test_level_1, y_train_val_level_1, y_test_level_1 = train_test_split(bow_level_1_text,all_label,test_size=.2,random_state=1)
x_train_val_level_2, x_test_level_2, y_train_val_level_2, y_test_level_2 = train_test_split(bow_level_2_text,all_label,test_size=.2,random_state=1)
x_train_val_level_3, x_test_level_3, y_train_val_level_3, y_test_level_3 = train_test_split(bow_level_3_text,all_label,test_size=.2,random_state=1)

## Logistic Regression

In [None]:
# apply level-1 
lr_clf_level_1 = LogisticRegression()
lr_clf_level_1.fit(x_train_val_level_1,y_train_val_level_1)
y_test_pred = lr_clf_level_1.predict(x_test_level_1)
print('############################ Logistic Regression Level 1 ############################')
analysis(y_test_level_1,y_test_pred)

# apply level-2
lr_clf_level_2 = LogisticRegression()
lr_clf_level_2.fit(x_train_val_level_2,y_train_val_level_2)
y_test_pred = lr_clf_level_2.predict(x_test_level_2)
print('############################ Logistic Regression Level 2 ############################')
analysis(y_test_level_2,y_test_pred)

# apply level-3
lr_clf_level_3 = LogisticRegression()
lr_clf_level_3.fit(x_train_val_level_3,y_train_val_level_3)
y_test_pred = lr_clf_level_3.predict(x_test_level_3)
print('############################ Logistic Regression Level 3 ############################')
analysis(y_test_level_3,y_test_pred)

############################ Logistic Regression Level 1 ############################
Report Classification
               precision    recall  f1-score   support

    positive       0.90      0.88      0.89      4493
    negative       0.89      0.90      0.89      4507

    accuracy                           0.89      9000
   macro avg       0.89      0.89      0.89      9000
weighted avg       0.89      0.89      0.89      9000

Matrix Confusion
 [[3970  523]
 [ 450 4057]]
Accuracy
 0.8918888888888888
############################ Logistic Regression Level 2 ############################
Report Classification
               precision    recall  f1-score   support

    positive       0.89      0.88      0.89      4493
    negative       0.89      0.89      0.89      4507

    accuracy                           0.89      9000
   macro avg       0.89      0.89      0.89      9000
weighted avg       0.89      0.89      0.89      9000

Matrix Confusion
 [[3974  519]
 [ 487 4020]]
Accuracy


## SVM

In [None]:
# apply level-1 calssification
svm_clf_level_1 = SVC(max_iter=5000)
svm_clf_level_1.fit(x_train_val_level_1,y_train_val_level_1)
y_test_pred = svm_clf_level_1.predict(x_test_level_1)
print('############################ SVM Level 1 ############################')
analysis(y_test_level_1,y_test_pred)

# apply level-2 calssification
svm_clf_level_2 = SVC(max_iter=5000)
svm_clf_level_2.fit(x_train_val_level_2,y_train_val_level_2)
y_test_pred = svm_clf_level_2.predict(x_test_level_2)
print('############################ SVM Level 2 ############################')
analysis(y_test_level_2,y_test_pred)

# apply level-3 calssification
svm_clf_level_3 = SVC(max_iter=5000)
svm_clf_level_3.fit(x_train_val_level_3,y_train_val_level_3)
y_test_pred = svm_clf_level_3.predict(x_test_level_3)
print('############################ SVM Level 3 ############################')
analysis(y_test_level_3,y_test_pred)

############################ SVM Level 1 ############################
Report Classification
               precision    recall  f1-score   support

    positive       0.87      0.47      0.61      4493
    negative       0.64      0.93      0.76      4507

    accuracy                           0.70      9000
   macro avg       0.75      0.70      0.68      9000
weighted avg       0.75      0.70      0.68      9000

Matrix Confusion
 [[2122 2371]
 [ 321 4186]]
Accuracy
 0.7008888888888889
############################ SVM Level 2 ############################
Report Classification
               precision    recall  f1-score   support

    positive       0.91      0.60      0.72      4493
    negative       0.70      0.94      0.80      4507

    accuracy                           0.77      9000
   macro avg       0.80      0.77      0.76      9000
weighted avg       0.80      0.77      0.76      9000

Matrix Confusion
 [[2707 1786]
 [ 283 4224]]
Accuracy
 0.7701111111111111
############

## KNN

In [None]:
# apply level-1 classification
knn_clf_level_1 = KNeighborsClassifier(n_neighbors=9,n_jobs=-1,algorithm='brute')
knn_clf_level_1.fit(x_train_val_level_1,y_train_val_level_1)
y_test_pred = knn_clf_level_1.predict(x_test_level_1)
print('############################ KNN Level 1 ############################')
analysis(y_test_level_1,y_test_pred)

# apply level-2 classification
knn_clf_level_2 = KNeighborsClassifier(n_neighbors=9,n_jobs=-1,algorithm='brute')
knn_clf_level_2.fit(x_train_val_level_2,y_train_val_level_2)
y_test_pred = knn_clf_level_2.predict(x_test_level_2)
print('############################ KNN Level 2 ############################')
analysis(y_test_level_2,y_test_pred)

# apply level-3 classification
knn_clf_level_3 = KNeighborsClassifier(n_neighbors=9,n_jobs=-1,algorithm='brute')
knn_clf_level_3.fit(x_train_val_level_3,y_train_val_level_3)
y_test_pred = knn_clf_level_3.predict(x_test_level_3)
print('############################ KNN Level 3 ############################')
analysis(y_test_level_3,y_test_pred)

############################ KNN Level 1 ############################
Report Classification
               precision    recall  f1-score   support

    positive       0.67      0.57      0.62      4493
    negative       0.63      0.72      0.67      4507

    accuracy                           0.65      9000
   macro avg       0.65      0.65      0.64      9000
weighted avg       0.65      0.65      0.64      9000

Matrix Confusion
 [[2565 1928]
 [1253 3254]]
Accuracy
 0.6465555555555556
############################ KNN Level 2 ############################
Report Classification
               precision    recall  f1-score   support

    positive       0.68      0.56      0.61      4493
    negative       0.63      0.73      0.68      4507

    accuracy                           0.65      9000
   macro avg       0.65      0.65      0.65      9000
weighted avg       0.65      0.65      0.65      9000

Matrix Confusion
 [[2527 1966]
 [1198 3309]]
Accuracy
 0.6484444444444445
############

#PART B



## In this part i apply previous algorithms using bag of words and word2vec only on 3rd level processing but also i finetune hyperparameters using cross-validation

In [None]:
# this function cross-validate logistic regression and returns best C
def cross_validition_logistic_regression(X,Y):
  best_score = 0
  best_C = None

  Cs = np.linspace(.1,20,20)
  for C in Cs:
    lr_clf = LogisticRegression()
    scores = cross_val_score(lr_clf,X,Y,cv=4)
    score = np.average(scores)
    if score > best_score:
      best_score = score
      best_C = C

  return best_C

# this function cross-validate SVM and returns best C
def cross_validition_svm(X,Y):
  best_score = 0
  best_C = None

  Cs = np.linspace(.1,1,3)
  for C in Cs:
    svm_clf = SVC(max_iter=5000,C=C)
    scores = cross_val_score(svm_clf,X,Y,cv=4)
    score = np.average(scores)
    if score > best_score:
      best_score = score
      best_C = C

  return best_C

# this function cross-validate knn and return best k
def cross_validition_knn(X,Y):
  best_score = 0
  best_k = None

  ks = np.arange(3,10)
  for k in ks:
    knn_clf = KNeighborsClassifier(n_neighbors=k,n_jobs=-1,algorithm='brute')
    scores = cross_val_score(knn_clf,X,Y,cv=4)
    score = np.average(scores)
    if score > best_score:
      best_score = score
      best_k = k

  return best_k

## Bag Of Word Results

In [None]:
# apply logistic regressin with cross validation
best_C_lr = cross_validition_logistic_regression(x_train_val_level_3,y_train_val_level_3)
lr_clf_bow = LogisticRegression(C=best_C_lr)
lr_clf_bow.fit(x_train_val_level_3,y_train_val_level_3)
y_test_level_3_pred = lr_clf_bow.predict(x_test_level_3)
print('################## Logistic Regression (Bag Of Words) ##################')
analysis(y_test_level_3,y_test_level_3_pred)
lr_clf_bow_accuracy = accuracy_score(y_test_level_3,y_test_level_3_pred)

# apply svm with cross validation
best_C = cross_validition_svm(x_train_val_level_3,y_train_val_level_3)
svm_clf_bow = SVC(max_iter=6000,C=best_C)
svm_clf_bow.fit(x_train_val_level_3,y_train_val_level_3)
y_test_level_3_pred = svm_clf_bow.predict(x_test_level_3)
print('################## SVM (Bag Of Words) ##################')
analysis(y_test_level_3,y_test_level_3_pred)
svm_clf_bow_accuracy = accuracy_score(y_test_level_3,y_test_level_3_pred)

# apply knn with cross validation
best_k = cross_validition_knn(x_train_val_level_3,y_train_val_level_3)
knn_clf_bow = KNeighborsClassifier(n_neighbors=best_k,n_jobs=-1,algorithm='brute')
knn_clf_bow.fit(x_train_val_level_3,y_train_val_level_3)
y_test_level_3_pred = knn_clf_bow.predict(x_test_level_3)
print('################## KNN (Bag Of Words) ##################')
analysis(y_test_level_3,y_test_level_3_pred)
knn_clf_bow_accuracy = accuracy_score(y_test_level_3,y_test_level_3_pred)

################## Logistic Regression (Bag Of Words) ##################
Report Classification
               precision    recall  f1-score   support

    positive       0.89      0.89      0.89      4493
    negative       0.89      0.89      0.89      4507

    accuracy                           0.89      9000
   macro avg       0.89      0.89      0.89      9000
weighted avg       0.89      0.89      0.89      9000

Matrix Confusion
 [[3979  514]
 [ 475 4032]]
Accuracy
 0.8901111111111111
################## SVM (Bag Of Words) ##################
Report Classification
               precision    recall  f1-score   support

    positive       0.91      0.85      0.88      4493
    negative       0.86      0.91      0.89      4507

    accuracy                           0.88      9000
   macro avg       0.88      0.88      0.88      9000
weighted avg       0.88      0.88      0.88      9000

Matrix Confusion
 [[3838  655]
 [ 395 4112]]
Accuracy
 0.8833333333333333
################## KNN

In [None]:
del level_1_processed_text
del level_2_processed_text
del level_3_processed_text
del bow_level_1_text
del bow_level_2_text
del bow_level_3_text
del x_train_val_level_1
del x_train_val_level_2
del x_train_val_level_3

## Word2Vec Results

In [None]:
# download ggole news word vectors and unzip it
! wget https://s3.amazonaws.com/dl4j-distribution/GoogleNews-vectors-negative300.bin.gz 
! gunzip GoogleNews-vectors-negative300.bin
# load word2vec model
word2vec_model = KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin', binary=True)

# preprocess text with level-3 preprocessing and add all text to bew_text array
new_text = []
p = Preprocessing(level=3)
for text in all_text:
  current_text = []
  processed_text = p.preprocess_sentence(text)
  for word in processed_text:
    current_text.append(word)
  new_text.append(current_text)

del all_text

# turn text to vectors using loaded word2vec
vectorized_text = []
for comment in new_text:
  temp = []
  for word in comment:
    try:
      temp.append(word2vec_model.wv[word])
    except:
      pass

  vectorized_text.append(np.average(temp,axis=0))
# turn vectorized list to numpy array
del new_text
del word2vec_model
vectorized_text = np.array(vectorized_text)
x_train_val_wv, x_test_wv, y_train_val_wv, y_test_wv = train_test_split(vectorized_text,all_label,test_size=.2,random_state=1)

--2021-07-27 02:50:45--  https://s3.amazonaws.com/dl4j-distribution/GoogleNews-vectors-negative300.bin.gz
Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.217.137.168
Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.217.137.168|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1647046227 (1.5G) [application/x-gzip]
Saving to: ‘GoogleNews-vectors-negative300.bin.gz’


2021-07-27 02:51:09 (65.3 MB/s) - ‘GoogleNews-vectors-negative300.bin.gz’ saved [1647046227/1647046227]



In [None]:
# Logistic Regression
best_C_lr = cross_validition_logistic_regression(x_train_val_wv,y_train_val_wv)
lr_clf_w2v = LogisticRegression(C=best_C_lr)
lr_clf_w2v.fit(x_train_val_wv,y_train_val_wv)
y_test_wv_pred = lr_clf_w2v.predict(x_test_wv)
print('################## Logistic Regression (Word2Vec) ##################')
analysis(y_test_wv,y_test_wv_pred)
lr_clf_wv_accuracy = accuracy_score(y_test_wv,y_test_wv_pred)

# SVM
best_C = cross_validition_svm(x_train_val_wv,y_train_val_wv)
svm_clf_w2v = SVC(max_iter=6000,C=best_C)
svm_clf_w2v.fit(x_train_val_wv,y_train_val_wv)
y_test_wv_pred = svm_clf_w2v.predict(x_test_wv)
print('################## SVM (Word2Vec) ##################')
analysis(y_test_wv,y_test_wv_pred)
svm_clf_wv_accuracy = accuracy_score(y_test_wv,y_test_wv_pred)

# KNN
best_k = cross_validition_knn(x_train_val_wv,y_train_val_wv)
knn_clf_w2v = KNeighborsClassifier(n_neighbors=best_k,n_jobs=-1,algorithm='brute')
knn_clf_w2v.fit(x_train_val_wv,y_train_val_wv)
y_test_wv_pred = knn_clf_w2v.predict(x_test_wv)
print('################## KNN (Word2Vec) ##################')
analysis(y_test_wv,y_test_wv_pred)
knn_clf_wv_accuracy = accuracy_score(y_test_wv,y_test_wv_pred)

################## Logistic Regression (Word2Vec) ##################
Report Classification
               precision    recall  f1-score   support

    positive       0.83      0.83      0.83      4493
    negative       0.83      0.83      0.83      4507

    accuracy                           0.83      9000
   macro avg       0.83      0.83      0.83      9000
weighted avg       0.83      0.83      0.83      9000

Matrix Confusion
 [[3749  744]
 [ 770 3737]]
Accuracy
 0.8317777777777777
################## SVM (Word2Vec) ##################
Report Classification
               precision    recall  f1-score   support

    positive       0.91      0.77      0.83      4493
    negative       0.80      0.92      0.86      4507

    accuracy                           0.85      9000
   macro avg       0.85      0.85      0.85      9000
weighted avg       0.85      0.85      0.85      9000

Matrix Confusion
 [[3463 1030]
 [ 354 4153]]
Accuracy
 0.8462222222222222
################## KNN (Word2V

## Save best models

In [None]:
# save all models by their name to drive

# load drive
from google.colab import drive
drive.mount('/content/drive')

# save all models (bow & w2v) to drive
pk.dump(lr_clf_w2v, open('/content/drive/My Drive/LR_W2V.pkl', 'wb'))
pk.dump(lr_clf_bow, open('/content/drive/My Drive/LR_BOW.pkl', 'wb'))
pk.dump(svm_clf_w2v, open('/content/drive/My Drive/SVM_W2V.pkl', 'wb'))
pk.dump(svm_clf_bow, open('/content/drive/My Drive/SVM_BOW.pkl', 'wb'))
pk.dump(knn_clf_w2v, open('/content/drive/My Drive/KNN_W2V.pkl', 'wb'))
pk.dump(knn_clf_bow, open('/content/drive/My Drive/KNN_BOW.pkl', 'wb'))

# save best models by name mentioned in pdf to drive
if lr_clf_wv_accuracy > lr_clf_bow_accuracy:
  pk.dump(lr_clf_w2v, open('/content/drive/My Drive/LR.pkl', 'wb'))
else:
  pk.dump(lr_clf_bow, open('/content/drive/My Drive/LR.pkl', 'wb'))

if svm_clf_wv_accuracy > svm_clf_bow_accuracy:
  pk.dump(svm_clf_w2v, open('/content/drive/My Drive/SVM.pkl', 'wb'))
else:
  pk.dump(svm_clf_bow, open('/content/drive/My Drive/SVM.pkl', 'wb'))

if knn_clf_wv_accuracy > knn_clf_bow_accuracy:
  pk.dump(knn_clf_w2v, open('/content/drive/My Drive/KNN.pkl', 'wb'))
else:
  pk.dump(knn_clf_bow, open('/content/drive/My Drive/KNN.pkl', 'wb'))

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# PART C

## In ths part i apply neural network on dataset to measure the performance l2-coefficient is finetuned using cross-validation

In [None]:
hidden_size_1 = 100
hidden_size_2 = 100
alphas = np.linspace(1e-6,1e-3,4)
best_score = 0

for alpha in alphas:
  mlp_clf = MLPClassifier(activation='relu',hidden_layer_sizes=(hidden_size_1,hidden_size_2),solver='adam',learning_rate_init=7e-5,alpha=alpha,learning_rate='adaptive',batch_size=100)
  scores = cross_val_score(mlp_clf,x_train_val_wv,y_train_val_wv,cv=4)
  score = np.average(scores)
  print(f'alpha {alpha} | accuracy on 4-fold cross validation {score}')
  if score > best_score:
    best_score = score
    best_alpha = alpha

print('################################')
print(f'best alpha is {best_alpha}')
print('start training best model on best alpha\n\n')

best_model = MLPClassifier(activation='relu',hidden_layer_sizes=(hidden_size_1,hidden_size_2),solver='adam',learning_rate_init=7e-5,alpha=best_alpha,batch_size=100,learning_rate='adaptive')
best_model.fit(x_train_val_wv,y_train_val_wv)
y_test_wv_pred = best_model.predict(x_test_wv)
analysis(y_test_wv,y_test_wv_pred)
pk.dump(best_model, open('best.pkl', 'wb'))

alpha 1e-06 | accuracy on 4-fold cross validation 0.8538333333333333
alpha 0.00033400000000000004 | accuracy on 4-fold cross validation 0.8585277777777777
alpha 0.0006670000000000001 | accuracy on 4-fold cross validation 0.8610277777777778
alpha 0.001 | accuracy on 4-fold cross validation 0.8591944444444445
################################
best alpha is 0.0006670000000000001
start training best model on best alpha


Report Classification
               precision    recall  f1-score   support

    positive       0.85      0.88      0.86      4493
    negative       0.87      0.84      0.86      4507

    accuracy                           0.86      9000
   macro avg       0.86      0.86      0.86      9000
weighted avg       0.86      0.86      0.86      9000

Matrix Confusion
 [[3932  561]
 [ 700 3807]]
Accuracy
 0.8598888888888889
