In [2]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_curve, roc_auc_score
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.multiclass import OneVsRestClassifier
import nltk
from nltk import WordNetLemmatizer
from nltk import pos_tag, word_tokenize
from nltk.corpus import stopwords as nltk_stopwords
from nltk.corpus import wordnet
import re
import pickle
import itertools
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.linear_model import LogisticRegression
import seaborn as sns

In [4]:
def prepare_text(text):
    def get_wordnet_pos(treebank_tag):
        mapping = {'J': wordnet.ADJ, 'V': wordnet.VERB, 'N': wordnet.NOUN, 'R': wordnet.ADV}
        return mapping.get(treebank_tag[0], wordnet.NOUN)
    clean_text = re.sub(r'[^a-zA-Z\']', ' ', text)
    words = clean_text.split()
    tokens = word_tokenize(' '.join(words))
    pos_tags = pos_tag(tokens)
    lemmatized_words = [WordNetLemmatizer().lemmatize(word, pos=get_wordnet_pos(tag)) for word, tag in pos_tags]
    lemmatized_text = ' '.join(lemmatized_words)
    return lemmatized_text

In [5]:
def load_vectorizer():
    with open('vectorizer.pkl', 'rb') as file:
        vectorizer = pickle.load(file)
    return vectorizer

In [6]:
def load_model(model_type):
    filename = f'{model_type}_model.pkl'
    with open(filename, 'rb') as file:
        model = pickle.load(file)
    return model

In [18]:
comment = " FUCK"
processed_comment = prepare_text(comment)


vectorizer = load_vectorizer()
vectorized_comment = vectorizer.transform([processed_comment])


predictions = {}
model_types = ['logistic', 'random_forest', 'naive_bayes', 'decision_tree', 'knn']

for model_type in model_types:
    model = load_model(model_type)
    if hasattr(model, "predict_proba"):
        prob = model.predict_proba(vectorized_comment)[:, 1]  
        pred = model.predict(vectorized_comment)[0]
    else:
        prob = None
        pred = model.predict(vectorized_comment)[0]
    
    predictions[model_type] = {
        'Predicted Class': pred,
        'Probability': prob[0] if prob is not None else "N/A"
    }

In [19]:
for model_type, result in predictions.items():
    print(f"Model: {model_type}")
    print(f"Predicted Class: {result['Predicted Class']}")
    print(f"Probability: {result['Probability']}")
    print('-' * 30)

Model: logistic
Predicted Class: 1
Probability: 0.9809509828551052
------------------------------
Model: random_forest
Predicted Class: 1
Probability: 0.5030769230769231
------------------------------
Model: naive_bayes
Predicted Class: 1
Probability: 0.9180928356273294
------------------------------
Model: decision_tree
Predicted Class: 0
Probability: 0.0
------------------------------
Model: knn
Predicted Class: 1
Probability: 1.0
------------------------------
