In [1]:
import numpy as np 
import pandas as pd
import csv
import matplotlib.pyplot as plt

from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer, TfidfTransformer

from imblearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, GridSearchCV
from imblearn.over_sampling import SMOTE
from sklearn.metrics import *
from helper import *

from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
import pickle

import cufflinks

cufflinks.go_offline()
cufflinks.set_config_file(world_readable=True, theme='pearl')

import warnings
warnings.filterwarnings("ignore")

Using TensorFlow backend.


In [2]:
data = pd.read_csv("Data/PRAW-Preprocessed.csv")
data.head(5)

Unnamed: 0,Post ID,Title,URL,Body,Score,Comments,Comments Count,Flair
0,g014wc,lost job sick mother dad earn,www.reddit.com,hi really tough time everyone . recently lost ...,0.05862,. listen . beginner . hard fact . one trust jo...,0.031235,AskIndia
1,fxofyu,government come begging bowl every crisis,www.reddit.com,terrorist due lack rain pandemic . every incid...,0.036134,understand use money contingency fund separate...,0.048641,AskIndia
2,g0zlly,mother condition going worse due hepatitis b e...,www.reddit.com,hi really appreciate warm response previous po...,0.042678,anyone influential twitter please help guy sta...,0.022413,AskIndia
3,fvy95j,men decided get married plan old age,www.reddit.com,corona virus given time think life bit . male ...,0.014879,plan . work enjoy way healthy . go see world ....,0.049118,AskIndia
4,g1lmhg,please advice reality face today .,www.reddit.com,sorry post going long . please bear . also thr...,0.024052,one thing say try clear taking loan basis job ...,0.01979,AskIndia


In [3]:
count = data.shape[0]

In [4]:
data.isna().sum()

Post ID              0
Title               40
URL                  0
Body              1737
Score                0
Comments           249
Comments Count       0
Flair                0
dtype: int64

In [5]:
data["Title"].fillna(" ", inplace = True)
data["Body"].fillna(" ", inplace = True) 
data["Comments"].fillna(" ", inplace = True)

In [6]:
flairs = ["AskIndia", "Non-Political", "Scheduled", "Photography", "Science/Technology", "Politics", 
          "Business/Finance", "Policy/Economy", "Sports", "Food", "Coronavirus"]

Helper functions

In [7]:
def singleFeature(feature):

    X = data[feature].values
    Y = data.Flair
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, random_state = 77, test_size = 0.2)

    return X_train, X_test, Y_train, Y_test

##

def dualFeature(feature1, feature2):

    X1 = data[feature1].values
    X2 = data[feature2].values
    Y = data.Flair

    X = []

    for ind in range(count):

        X.append(X1[ind] + ' ' + X2[ind])

    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, random_state = 77, test_size = 0.2)

    return X_train, X_test, Y_train, Y_test

##

def triFeature(feature1, feature2, feature3):

    X1 = data[feature1].values
    X2 = data[feature2].values
    X3 = data[feature3].values
    Y = data.Flair

    X = []

    for ind in range(count):

        X.append(X1[ind] + ' ' + X2[ind] + ' ' + X3[ind])

    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, random_state = 77, test_size = 0.2)

    return X_train, X_test, Y_train, Y_test

##

def split(feature):

    X = combFeature[feature].values
    Y = combFeature.Flair

    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, random_state = 77, test_size = 0.2)

    return X_train, X_test, Y_train, Y_test


### Title

In [8]:
X_train, X_test, Y_train, Y_test = singleFeature("Title")

Naive Bayes

In [9]:
MNB = Pipeline([('vect', CountVectorizer(ngram_range = (1, 1))),
               ('tfidf', TfidfTransformer()),
               ('MNB', MultinomialNB(alpha = 1, fit_prior = False)),
              ])
MNB.fit(X_train, Y_train)

from sklearn.metrics import classification_report
Y_pred = MNB.predict(X_test)

print('Train accuracy '+ str(MNB.score(X_train, Y_train)))
print('Test accuracy %s' % accuracy_score(Y_pred, Y_test))
print(classification_report(Y_test, Y_pred, target_names = flairs))

Train accuracy 0.9091337099811676
Test accuracy 0.47368421052631576
                    precision    recall  f1-score   support

          AskIndia       0.26      0.37      0.30        51
     Non-Political       0.40      0.36      0.38        47
         Scheduled       0.45      0.59      0.51        51
       Photography       0.56      0.51      0.53        55
Science/Technology       0.26      0.17      0.21        52
          Politics       0.80      0.68      0.74        57
  Business/Finance       0.34      0.50      0.40        38
    Policy/Economy       0.38      0.40      0.39        45
            Sports       0.87      0.97      0.92        35
              Food       0.34      0.19      0.24        58
       Coronavirus       0.68      0.65      0.67        43

          accuracy                           0.47       532
         macro avg       0.49      0.49      0.48       532
      weighted avg       0.48      0.47      0.47       532



Logistic Regression

In [10]:
LR = Pipeline([('vect', CountVectorizer(ngram_range = (1, 1))),
                ('tfidf', TfidfTransformer()),
                ('LR', LogisticRegression(solver = "liblinear", penalty = "l2")),
               ])
LR.fit(X_train, Y_train)

Y_pred = LR.predict(X_test)

print('Train accuracy '+ str(LR.score(X_train, Y_train)))
print('Test accuracy %s' % accuracy_score(Y_pred, Y_test))
print(classification_report(Y_test, Y_pred, target_names = flairs))

Train accuracy 0.8860640301318268
Test accuracy 0.462406015037594
                    precision    recall  f1-score   support

          AskIndia       0.33      0.31      0.32        51
     Non-Political       0.37      0.30      0.33        47
         Scheduled       0.48      0.59      0.53        51
       Photography       0.54      0.47      0.50        55
Science/Technology       0.24      0.21      0.23        52
          Politics       0.88      0.65      0.75        57
  Business/Finance       0.35      0.47      0.40        38
    Policy/Economy       0.26      0.51      0.34        45
            Sports       1.00      0.94      0.97        35
              Food       0.35      0.24      0.29        58
       Coronavirus       0.73      0.56      0.63        43

          accuracy                           0.46       532
         macro avg       0.50      0.48      0.48       532
      weighted avg       0.49      0.46      0.47       532



Random Forest

In [11]:
RF = Pipeline([
    ('vect', CountVectorizer(ngram_range = (1, 1))),
    ('tfidf', TfidfTransformer()),
    ('RF', RandomForestClassifier(max_depth = 3000, min_samples_split = 200))
])

RF.fit(X_train, Y_train)

Y_pred = RF.predict(X_test)

print('Train accuracy '+ str(RF.score(X_train, Y_train)))
print('Test accuracy %s' % accuracy_score(Y_pred, Y_test))
print(classification_report(Y_test, Y_pred, target_names = flairs))

Train accuracy 0.89030131826742
Test accuracy 0.43609022556390975
                    precision    recall  f1-score   support

          AskIndia       0.41      0.41      0.41        51
     Non-Political       0.34      0.23      0.28        47
         Scheduled       0.47      0.59      0.52        51
       Photography       0.47      0.40      0.43        55
Science/Technology       0.18      0.17      0.18        52
          Politics       0.81      0.60      0.69        57
  Business/Finance       0.31      0.34      0.33        38
    Policy/Economy       0.21      0.51      0.30        45
            Sports       0.97      0.97      0.97        35
              Food       0.43      0.21      0.28        58
       Coronavirus       0.68      0.53      0.60        43

          accuracy                           0.44       532
         macro avg       0.48      0.45      0.45       532
      weighted avg       0.47      0.44      0.44       532



### Comments

In [12]:
X_train, X_test, Y_train, Y_test = singleFeature("Comments")

Naive Bayes

In [13]:
MNB = Pipeline([('vect', CountVectorizer(ngram_range = (1, 1))),
               ('tfidf', TfidfTransformer()),
               ('MNB', MultinomialNB(alpha = 1, fit_prior = False)),
              ])
MNB.fit(X_train, Y_train)

from sklearn.metrics import classification_report
Y_pred = MNB.predict(X_test)

print('Train accuracy '+ str(MNB.score(X_train, Y_train)))
print('Test accuracy %s' % accuracy_score(Y_pred, Y_test))
print(classification_report(Y_test, Y_pred, target_names = flairs))

Train accuracy 0.7269303201506592
Test accuracy 0.37406015037593987
                    precision    recall  f1-score   support

          AskIndia       0.19      0.55      0.28        51
     Non-Political       0.53      0.17      0.26        47
         Scheduled       1.00      0.06      0.11        51
       Photography       0.96      0.42      0.58        55
Science/Technology       0.42      0.21      0.28        52
          Politics       0.95      0.61      0.74        57
  Business/Finance       0.35      0.32      0.33        38
    Policy/Economy       0.28      0.84      0.42        45
            Sports       0.35      0.97      0.52        35
              Food       0.33      0.02      0.03        58
       Coronavirus       1.00      0.14      0.24        43

          accuracy                           0.37       532
         macro avg       0.58      0.39      0.35       532
      weighted avg       0.59      0.37      0.34       532



Logistic Regression

In [14]:
LR = Pipeline([('vect', CountVectorizer(ngram_range = (1, 1))),
                ('tfidf', TfidfTransformer()),
                ('LR', LogisticRegression(solver = "liblinear", penalty = "l2")),
               ])
LR.fit(X_train, Y_train)

Y_pred = LR.predict(X_test)

print('Train accuracy '+ str(LR.score(X_train, Y_train)))
print('Test accuracy %s' % accuracy_score(Y_pred, Y_test))
print(classification_report(Y_test, Y_pred, target_names = flairs))

Train accuracy 0.85969868173258
Test accuracy 0.5319548872180451
                    precision    recall  f1-score   support

          AskIndia       0.53      0.49      0.51        51
     Non-Political       0.48      0.28      0.35        47
         Scheduled       0.31      0.63      0.41        51
       Photography       0.87      0.60      0.71        55
Science/Technology       0.55      0.31      0.40        52
          Politics       0.77      0.81      0.79        57
  Business/Finance       0.29      0.37      0.33        38
    Policy/Economy       0.48      0.64      0.55        45
            Sports       0.68      0.91      0.78        35
              Food       0.56      0.38      0.45        58
       Coronavirus       0.64      0.49      0.55        43

          accuracy                           0.53       532
         macro avg       0.56      0.54      0.53       532
      weighted avg       0.57      0.53      0.53       532



Random Forest

In [15]:
RF = Pipeline([
    ('vect', CountVectorizer(ngram_range = (1, 1))),
    ('tfidf', TfidfTransformer()),
    ('RF', RandomForestClassifier(max_depth = 3000, min_samples_split = 200))
])

RF.fit(X_train, Y_train)

Y_pred = RF.predict(X_test)

print('Train accuracy '+ str(RF.score(X_train, Y_train)))
print('Test accuracy %s' % accuracy_score(Y_pred, Y_test))
print(classification_report(Y_test, Y_pred, target_names = flairs))

Train accuracy 0.8192090395480226
Test accuracy 0.4417293233082707
                    precision    recall  f1-score   support

          AskIndia       0.50      0.57      0.53        51
     Non-Political       0.45      0.30      0.36        47
         Scheduled       0.30      0.57      0.39        51
       Photography       0.66      0.38      0.48        55
Science/Technology       0.32      0.15      0.21        52
          Politics       0.71      0.74      0.72        57
  Business/Finance       0.37      0.42      0.40        38
    Policy/Economy       0.32      0.58      0.41        45
            Sports       0.50      0.86      0.63        35
              Food       0.35      0.12      0.18        58
       Coronavirus       0.50      0.30      0.38        43

          accuracy                           0.44       532
         macro avg       0.45      0.45      0.43       532
      weighted avg       0.46      0.44      0.42       532



### Title - Comments

In [16]:
X_train, X_test, Y_train, Y_test = dualFeature("Title", "Comments")

Naive Bayes

In [17]:
MNB = Pipeline([('vect', CountVectorizer(ngram_range = (1, 1))),
               ('tfidf', TfidfTransformer()),
               ('MNB', MultinomialNB(alpha = 1, fit_prior = False)),
              ])
MNB.fit(X_train, Y_train)

from sklearn.metrics import classification_report
Y_pred = MNB.predict(X_test)

print('Train accuracy '+ str(MNB.score(X_train, Y_train)))
print('Test accuracy %s' % accuracy_score(Y_pred, Y_test))
print(classification_report(Y_test, Y_pred, target_names = flairs))

Train accuracy 0.852165725047081
Test accuracy 0.4868421052631579
                    precision    recall  f1-score   support

          AskIndia       0.31      0.55      0.40        51
     Non-Political       0.55      0.23      0.33        47
         Scheduled       0.81      0.41      0.55        51
       Photography       0.96      0.47      0.63        55
Science/Technology       0.45      0.27      0.34        52
          Politics       1.00      0.70      0.82        57
  Business/Finance       0.33      0.39      0.36        38
    Policy/Economy       0.30      0.87      0.45        45
            Sports       0.40      0.97      0.56        35
              Food       0.58      0.12      0.20        58
       Coronavirus       0.96      0.56      0.71        43

          accuracy                           0.49       532
         macro avg       0.61      0.50      0.49       532
      weighted avg       0.62      0.49      0.49       532



Logistic Regression

In [18]:
LR = Pipeline([('vect', CountVectorizer(ngram_range = (1, 1))),
                ('tfidf', TfidfTransformer()),
                ('LR', LogisticRegression(solver = "liblinear", penalty = "l2")),
               ])
LR.fit(X_train, Y_train)

Y_pred = LR.predict(X_test)

print('Train accuracy '+ str(LR.score(X_train, Y_train)))
print('Test accuracy %s' % accuracy_score(Y_pred, Y_test))
print(classification_report(Y_test, Y_pred, target_names = flairs))

Train accuracy 0.9166666666666666
Test accuracy 0.6353383458646616
                    precision    recall  f1-score   support

          AskIndia       0.57      0.53      0.55        51
     Non-Political       0.57      0.45      0.50        47
         Scheduled       0.59      0.73      0.65        51
       Photography       0.87      0.71      0.78        55
Science/Technology       0.60      0.35      0.44        52
          Politics       0.89      0.89      0.89        57
  Business/Finance       0.32      0.45      0.37        38
    Policy/Economy       0.46      0.64      0.54        45
            Sports       0.85      0.94      0.89        35
              Food       0.57      0.52      0.54        58
       Coronavirus       0.80      0.84      0.82        43

          accuracy                           0.64       532
         macro avg       0.64      0.64      0.63       532
      weighted avg       0.65      0.64      0.63       532



Random Forest

In [19]:
RF = Pipeline([
    ('vect', CountVectorizer(ngram_range = (1, 1))),
    ('tfidf', TfidfTransformer()),
    ('RF', RandomForestClassifier(max_depth = 3000, min_samples_split = 200))
])

RF.fit(X_train, Y_train)

Y_pred = RF.predict(X_test)

print('Train accuracy '+ str(RF.score(X_train, Y_train)))
print('Test accuracy %s' % accuracy_score(Y_pred, Y_test))
print(classification_report(Y_test, Y_pred, target_names = flairs))

Train accuracy 0.832391713747646
Test accuracy 0.5131578947368421
                    precision    recall  f1-score   support

          AskIndia       0.47      0.49      0.48        51
     Non-Political       0.50      0.32      0.39        47
         Scheduled       0.53      0.57      0.55        51
       Photography       0.79      0.55      0.65        55
Science/Technology       0.40      0.19      0.26        52
          Politics       0.73      0.77      0.75        57
  Business/Finance       0.35      0.45      0.40        38
    Policy/Economy       0.32      0.53      0.40        45
            Sports       0.56      0.94      0.70        35
              Food       0.36      0.31      0.33        58
       Coronavirus       0.72      0.65      0.68        43

          accuracy                           0.51       532
         macro avg       0.52      0.52      0.51       532
      weighted avg       0.53      0.51      0.51       532



### Title - Comments - Body

In [20]:
X_train, X_test, Y_train, Y_test = triFeature("Title", "Body", "Comments")

Naive Bayes

In [21]:
MNB = Pipeline([('vect', CountVectorizer(ngram_range = (1, 1))),
               ('tfidf', TfidfTransformer()),
               ('MNB', MultinomialNB(alpha = 1, fit_prior = False)),
              ])
MNB.fit(X_train, Y_train)

from sklearn.metrics import classification_report
Y_pred = MNB.predict(X_test)

print('Train accuracy '+ str(MNB.score(X_train, Y_train)))
print('Test accuracy %s' % accuracy_score(Y_pred, Y_test))
print(classification_report(Y_test, Y_pred, target_names = flairs))

Train accuracy 0.8441619585687382
Test accuracy 0.5075187969924813
                    precision    recall  f1-score   support

          AskIndia       0.29      0.69      0.41        51
     Non-Political       0.60      0.26      0.36        47
         Scheduled       0.80      0.39      0.53        51
       Photography       0.93      0.51      0.66        55
Science/Technology       0.52      0.25      0.34        52
          Politics       1.00      0.70      0.82        57
  Business/Finance       0.37      0.47      0.41        38
    Policy/Economy       0.33      0.84      0.47        45
            Sports       0.48      0.97      0.64        35
              Food       0.58      0.12      0.20        58
       Coronavirus       0.96      0.58      0.72        43

          accuracy                           0.51       532
         macro avg       0.62      0.53      0.51       532
      weighted avg       0.64      0.51      0.50       532



Logistic Regression

In [22]:
LR = Pipeline([('vect', CountVectorizer(ngram_range = (1, 1))),
                ('tfidf', TfidfTransformer()),
                ('LR', LogisticRegression(solver = "liblinear", penalty = "l2")),
               ])
LR.fit(X_train, Y_train)

Y_pred = LR.predict(X_test)

print('Train accuracy '+ str(LR.score(X_train, Y_train)))
print('Test accuracy %s' % accuracy_score(Y_pred, Y_test))
print(classification_report(Y_test, Y_pred, target_names = flairs))

Train accuracy 0.9119585687382298
Test accuracy 0.6409774436090225
                    precision    recall  f1-score   support

          AskIndia       0.58      0.55      0.57        51
     Non-Political       0.57      0.49      0.53        47
         Scheduled       0.59      0.73      0.65        51
       Photography       0.86      0.76      0.81        55
Science/Technology       0.56      0.37      0.44        52
          Politics       0.86      0.89      0.88        57
  Business/Finance       0.31      0.42      0.36        38
    Policy/Economy       0.43      0.58      0.50        45
            Sports       0.97      0.94      0.96        35
              Food       0.61      0.52      0.56        58
       Coronavirus       0.80      0.84      0.82        43

          accuracy                           0.64       532
         macro avg       0.65      0.64      0.64       532
      weighted avg       0.65      0.64      0.64       532



In [23]:
pickle.dump(LR, open('LR-PRAW.pkl', 'wb'))

Random Forest

In [24]:
RF = Pipeline([
    ('vect', CountVectorizer(ngram_range = (1, 1))),
    ('tfidf', TfidfTransformer()),
    ('RF', RandomForestClassifier(max_depth = 3000, min_samples_split = 200))
])

RF.fit(X_train, Y_train)

Y_pred = RF.predict(X_test)

print('Train accuracy '+ str(RF.score(X_train, Y_train)))
print('Test accuracy %s' % accuracy_score(Y_pred, Y_test))
print(classification_report(Y_test, Y_pred, target_names = flairs))

Train accuracy 0.847457627118644
Test accuracy 0.5338345864661654
                    precision    recall  f1-score   support

          AskIndia       0.47      0.63      0.54        51
     Non-Political       0.48      0.32      0.38        47
         Scheduled       0.41      0.57      0.48        51
       Photography       0.80      0.67      0.73        55
Science/Technology       0.22      0.10      0.13        52
          Politics       0.73      0.79      0.76        57
  Business/Finance       0.39      0.53      0.45        38
    Policy/Economy       0.41      0.53      0.46        45
            Sports       0.71      0.91      0.80        35
              Food       0.45      0.26      0.33        58
       Coronavirus       0.68      0.70      0.69        43

          accuracy                           0.53       532
         macro avg       0.52      0.55      0.52       532
      weighted avg       0.52      0.53      0.52       532



Working on PRAW-CombinedFeature.csv

In [25]:
combFeature = pd.read_csv("Data/PRAW-CombinedFeature.csv")

### Title - Body - Comments (Augmented)

In [26]:
X_train, X_test, Y_train, Y_test = split("CombinedFeature")

Naive Bayes

In [27]:
MNB = Pipeline([('vect', CountVectorizer(ngram_range = (1, 2))),
               ('tfidf', TfidfTransformer()),
               ('MNB', MultinomialNB(alpha = 0, fit_prior = True)),
              ])
MNB.fit(X_train, Y_train)

from sklearn.metrics import classification_report
Y_pred = MNB.predict(X_test)

print('Train accuracy '+ str(MNB.score(X_train, Y_train)))
print('Test accuracy %s' % accuracy_score(Y_pred, Y_test))
print(classification_report(Y_test, Y_pred, target_names = flairs))

Train accuracy 0.9990586020240056
Test accuracy 0.8579492003762935
                    precision    recall  f1-score   support

          AskIndia       0.56      0.95      0.71        88
     Non-Political       0.93      0.81      0.87        85
         Scheduled       1.00      0.90      0.95       103
       Photography       0.96      0.80      0.88        97
Science/Technology       0.76      0.81      0.79        96
          Politics       1.00      0.80      0.89        92
  Business/Finance       0.88      0.93      0.90        96
    Policy/Economy       0.83      0.87      0.85       109
            Sports       0.82      0.87      0.84        99
              Food       0.97      0.76      0.86        93
       Coronavirus       0.98      0.90      0.94       105

          accuracy                           0.86      1063
         macro avg       0.88      0.86      0.86      1063
      weighted avg       0.88      0.86      0.86      1063



Logistic Regression

In [28]:
LR = Pipeline([('vect', CountVectorizer(ngram_range = (1, 2))),
                ('tfidf', TfidfTransformer()),
                ('LR', LogisticRegression(solver = "liblinear", penalty = "l2")),
               ])
LR.fit(X_train, Y_train)

Y_pred = LR.predict(X_test)

print('Train accuracy '+ str(LR.score(X_train, Y_train)))
print('Test accuracy %s' % accuracy_score(Y_pred, Y_test))
print(classification_report(Y_test, Y_pred, target_names = flairs))

Train accuracy 0.9814073899741116
Test accuracy 0.812793979303857
                    precision    recall  f1-score   support

          AskIndia       0.60      0.70      0.65        88
     Non-Political       0.73      0.74      0.74        85
         Scheduled       0.82      0.79      0.80       103
       Photography       0.86      0.91      0.88        97
Science/Technology       0.79      0.62      0.70        96
          Politics       0.88      0.97      0.92        92
  Business/Finance       0.75      0.70      0.72        96
    Policy/Economy       0.71      0.82      0.76       109
            Sports       1.00      0.92      0.96        99
              Food       0.82      0.78      0.80        93
       Coronavirus       0.99      0.96      0.98       105

          accuracy                           0.81      1063
         macro avg       0.81      0.81      0.81      1063
      weighted avg       0.82      0.81      0.81      1063



Random Forest

In [29]:
RF = Pipeline([
    ('vect', CountVectorizer(ngram_range = (1, 2))),
    ('tfidf', TfidfTransformer()),
    ('RF', RandomForestClassifier(max_depth = 4000, min_samples_split = 200))
])

RF.fit(X_train, Y_train)

Y_pred = RF.predict(X_test)

print('Train accuracy '+ str(RF.score(X_train, Y_train)))
print('Test accuracy %s' % accuracy_score(Y_pred, Y_test))
print(classification_report(Y_test, Y_pred, target_names = flairs))

Train accuracy 0.957166392092257
Test accuracy 0.7215428033866416
                    precision    recall  f1-score   support

          AskIndia       0.50      0.66      0.57        88
     Non-Political       0.56      0.58      0.57        85
         Scheduled       0.58      0.74      0.65       103
       Photography       0.83      0.82      0.83        97
Science/Technology       0.66      0.42      0.51        96
          Politics       0.76      0.91      0.83        92
  Business/Finance       0.70      0.52      0.60        96
    Policy/Economy       0.74      0.80      0.77       109
            Sports       0.90      0.93      0.92        99
              Food       0.79      0.68      0.73        93
       Coronavirus       0.98      0.84      0.90       105

          accuracy                           0.72      1063
         macro avg       0.73      0.72      0.72      1063
      weighted avg       0.73      0.72      0.72      1063

