<a href="https://colab.research.google.com/github/Prabhash1107/SMS-Spam-Detection-NLP/blob/main/MLmodule.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier

class SpamDetector:
    def __init__(self):
        self.df = pd.read_csv('Cleaned_Data.csv')
        self.df['Email'] = self.df.Email.astype(str)

        self.Data = self.df.Email
        self.Labels = self.df.Label

        # Split dataset
        self.training_data, self.testing_data, self.training_labels, self.testing_labels = train_test_split(
            self.Data, self.Labels, test_size=0.2, random_state=10
        )

        self.vectorizer = TfidfVectorizer()
        self.training_vectors = self.vectorizer.fit_transform(self.training_data)

        # Model Training
        self.model_nb = MultinomialNB().fit(self.training_vectors, self.training_labels)
        self.model_svm = SVC(probability=True).fit(self.training_vectors, self.training_labels)
        self.model_lr = LogisticRegression().fit(self.training_vectors, self.training_labels)
        self.model_knn = KNeighborsClassifier(n_neighbors=9).fit(self.training_vectors, self.training_labels)
        self.model_rf = RandomForestClassifier(n_estimators=19).fit(self.training_vectors, self.training_labels)

    def get_prediction(self, vector):
        preds = [
            self.model_nb.predict(vector)[0],
            self.model_lr.predict(vector)[0],
            self.model_rf.predict(vector)[0],
            self.model_svm.predict(vector)[0],
            self.model_knn.predict(vector)[0]
        ]
        return 'Spam' if preds.count(1) >= 3 else 'Non-Spam'

    def get_probabilities(self, vector):
        return [
            self.model_nb.predict_proba(vector)[0] * 100,
            self.model_lr.predict_proba(vector)[0] * 100,
            self.model_rf.predict_proba(vector)[0] * 100,
            self.model_knn.predict_proba(vector)[0] * 100,
            self.model_svm.predict_proba(vector)[0] * 100,
        ]

    def get_vector(self, text):
        return self.vectorizer.transform([text])
