In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
import plotly
import plotly.express as px

import sklearn
from sklearn import metrics
from sklearn.feature_extraction.text import CountVectorizer,TfidfTransformer,TfidfVectorizer
from sklearn.linear_model import LogisticRegressionCV
from sklearn.metrics import classification_report, accuracy_score, f1_score, recall_score, precision_score
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from sklearn.naive_bayes import MultinomialNB as mnb
from sklearn.neighbors import KNeighborsClassifier as knn
from sklearn.tree import DecisionTreeClassifier as dtc
 
import string
import re
import nltk 

from typing import List

 
def evaluate_model (c,y_true: pd.Series, 
                              y_pred: pd.Series, 
                              report:bool = False,
                              plot: bool = False,
                                )-> float:
    
    colour=['Accent', 'Accent_r', 'Blues', 'Blues_r', 'BrBG', 'BrBG_r', 'BuGn', 'BuGn_r', 'BuPu', 'BuPu_r', 'CMRmap', 'CMRmap_r', 'Dark2', 'Dark2_r', 'GnBu', 'GnBu_r', 'Greens', 'Greens_r', 'Greys', 'Greys_r', 'OrRd', 'OrRd_r', 'Oranges', 'Oranges_r', 'PRGn', 'PRGn_r', 'Paired', 'Paired_r', 'Pastel1', 'Pastel1_r', 'Pastel2', 'Pastel2_r', 'PiYG', 'PiYG_r', 'PuBu', 'PuBuGn', 'PuBuGn_r', 'PuBu_r', 'PuOr', 'PuOr_r', 'PuRd', 'PuRd_r', 'Purples', 'Purples_r', 'RdBu', 'RdBu_r', 'RdGy', 'RdGy_r', 'RdPu', 'RdPu_r', 'RdYlBu', 'RdYlBu_r', 'RdYlGn', 'RdYlGn_r', 'Reds', 'Reds_r', 'Set1', 'Set1_r', 'Set2', 'Set2_r', 'Set3', 'Set3_r', 'Spectral', 'Spectral_r', 'Wistia', 'Wistia_r', 'YlGn', 'YlGnBu', 'YlGnBu_r', 'YlGn_r', 'YlOrBr', 'YlOrBr_r', 'YlOrRd', 'YlOrRd_r', 'afmhot', 'afmhot_r', 'autumn', 'autumn_r', 'binary', 'binary_r', 'bone', 'bone_r', 'brg', 'brg_r', 'bwr', 'bwr_r', 'cividis', 'cividis_r', 'cool', 'cool_r', 'coolwarm', 'coolwarm_r', 'copper', 'copper_r', 'crest', 'crest_r', 'cubehelix', 'cubehelix_r', 'flag', 'flag_r', 'flare', 'flare_r', 'gist_earth', 'gist_earth_r', 'gist_gray', 'gist_gray_r', 'gist_heat', 'gist_heat_r', 'gist_ncar', 'gist_ncar_r', 'gist_rainbow', 'gist_rainbow_r', 'gist_stern', 'gist_stern_r', 'gist_yarg', 'gist_yarg_r', 'gnuplot', 'gnuplot2', 'gnuplot2_r', 'gnuplot_r', 'gray', 'gray_r', 'hot', 'hot_r', 'hsv', 'hsv_r', 'icefire', 'icefire_r', 'inferno', 'inferno_r', 'jet', 'jet_r', 'magma', 'magma_r', 'mako', 'mako_r', 'nipy_spectral', 'nipy_spectral_r', 'ocean', 'ocean_r', 'pink', 'pink_r', 'plasma', 'plasma_r', 'prism', 'prism_r', 'rainbow', 'rainbow_r', 'rocket', 'rocket_r', 'seismic', 'seismic_r', 'spring', 'spring_r', 'summer', 'summer_r', 'tab10', 'tab10_r', 'tab20', 'tab20_r', 'tab20b', 'tab20b_r', 'tab20c', 'tab20c_r', 'terrain', 'terrain_r', 'turbo', 'turbo_r', 'twilight', 'twilight_r', 'twilight_shifted', 'twilight_shifted_r', 'viridis', 'viridis_r', 'vlag', 'vlag_r', 'winter', 'winter_r']
    if report:
        print(classification_report(y_true, 
                            y_pred,
                            digits=4))
    if plot:
        # figure
        fig, ax = plt.subplots(figsize=(4, 4))
        conf_matrix = pd.crosstab(y_true, 
                           y_pred, 
                           rownames=['Actual'], 
                           colnames=['Predicted'])
        sns.heatmap(conf_matrix, 
                    annot=True, fmt=".0f",
                    cmap=colour[c], 
                    cbar_kws={'fraction' : 0.04}, 
                    linewidth=0.3,
                   ) 
        plt.title('Confusion Matrix', fontsize=14)
        plt.show()
        
    if not report and not plot:
        print('* Accuracy Score: ', "{:.4%}".format(accuracy_score(y_true, y_pred)))
        print('* F1 Score: ', "{:.4%}".format(f1_score(y_true, y_pred )))
        print('* Recall Score: ', "{:.4%}".format(recall_score(y_true , y_pred )))
        print('* Precision Score: ', "{:.4%}".format(precision_score(y_true , y_pred)))
        
    
    
    
    
def bow_vectorizer(doc_tokens: List[str]):
    
    count_vect = CountVectorizer(
        analyzer='word',
        tokenizer=lambda doc:doc,
        preprocessor=lambda doc:doc,
        min_df=5,
        token_pattern=None)
    
    fit_bow_CV = count_vect.fit(doc_tokens)
        
    return fit_bow_CV
    

def train_logistic_regressor(X_test_bow_matrix, 
                             y_train: pd.Series):
    
    clf=LogisticRegressionCV(cv=5,
                            random_state=42,
                            n_jobs=-1,
                            verbose=3,
                            max_iter=300).fit(X_test_bow_matrix,y_train)
    
    return clf


def train_RandomForest_classifier(X_train_bow_matrix,y_train):
    class_weight = compute_class_weight(class_weight='balanced',
                                        classes=[0,1], y=y_train)
    clf = RandomForestClassifier(class_weight={0:class_weight[0], 1:class_weight[1]})
    clf.fit(X_train_bow_matrix, y_train)
    
    return clf


def train_svm_classifier(X_train_bow_matrix,y_train):
    class_weight = compute_class_weight(class_weight='balanced',
                                        classes=[0,1], y=y_train)
    clf = svm.SVC(kernel='linear',class_weight={0:class_weight[0],1:class_weight[1]})
    clf.fit(X_train_bow_matrix,y_train)

    return clf

def train_multimodal_naive_bayes_classifier(X_train_bow_matrix,y_train):
    
    clf=mnb()
    clf.fit(X_train_bow_matrix,y_train)
    return clf

def train_KNN_classifier(X_train_bow_matrix,y_train):
    
    clf=knn(n_neighbors=5)
    clf.fit(X_train_bow_matrix,y_train)
    return clf

def train_decisiontree_classifier(X_train_bow_matrix,y_train):
    clf=dtc()
    clf.fit(X_train_bow_matrix,y_train)
    return clf
def comparisons(acc,string,y_true,y_pred,mode:bool=False):
    if not mode:
        acc.append([string,accuracy_score(y_true,y_pred)])
    else:
        print(":**Comparison Between Models:**")
        for i in acc:
            print(f"||{i[0]}=>{i[1]}")
        return acc