# Modeling - Multinomial Naive Bayes
___

#

### Imports
---

In [None]:
#Import all Libraries, Transformers, Models, and Plotting Tools

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.metrics import confusion_matrix, plot_confusion_matrix
from sklearn.compose import make_column_transformer
from sklearn.metrics import classification_report
from sklearn.naive_bayes import MultinomialNB
import nltk
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer 

#

### Data and Features
___

In [None]:
# Read in Clean Combined Data Frame
df = pd.read_csv('./data/df.csv')

#Set X and y Variables
X = df[['lemma_text']] 
y = df['poker']

#Train/Test/Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, stratify=y, random_state=42)

#Getting Baseline
y.value_counts(normalize=True)

#

### Modeling
___

In [64]:
#instantiating count vectorizer 
cvec=CountVectorizer(stop_words='english')

#Instantiating Stopwords and adding additional words added to stopwords list after initial word count EDA
nltk.download('stopwords')
stopwords = stopwords.words('english')
stop_list = ["dad","poker", "say", "joke"] 
stpwrd = nltk.corpus.stopwords.words('english')
stpwrd.extend(stop_list)

#Creating a Column Transformer to Model Non Sentiment Columns with Count Vectorizer
ct1=make_column_transformer(
    (cvec, 'lemma_text'), 
    remainder='passthrough'
)

#Creating a Pipeline with Model 
transformer_pipe1 = Pipeline([
    ('ct1',ct1),
    ('mn',MultinomialNB())
])


[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/preetsekhon/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [None]:
#Fitting Transformer on Training Set 
transformer_pipe1.fit(X_train, y_train)

In [None]:
#Scoring Data on Training Set
transformer_pipe1.score(X_train, y_train)

In [None]:
#Scoring Data on Test Set
transformer_pipe1.score(X_test, y_test)

In [None]:
# Get Predictions
preds_pipe = transformer_pipe1.predict(X_test)

# Calculate the specificity and precision
print(classification_report(y_test, preds_pipe, target_names=['Dad Jokes', 'Poker']))

In [None]:
#Plotting Confusion Matrix and Saving Plot to Folder
plot_confusion_matrix(transformer_pipe1,X_test,y_test,display_labels=['Dad Jokes','Poker'])
plt.title('MultiNomial NB')
plt.savefig('visuals/MN_CM.jpeg',dpi=300, bbox_inches = "tight")
plt.show();