In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
import pandas as pd
import os

# Navigate to the directory containing your file
os.chdir('/content/drive/')


In [8]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.pipeline import Pipeline
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import re

def download_nltk_resources():

    resources = ['stopwords', 'wordnet']
    for resource in resources:
        try:
            nltk.download(resource, quiet=True)
        except Exception as e:
            print(f"Error downloading {resource}: {str(e)}")

def clean_text(text):

    try:

        text = str(text).lower()


        text = re.sub(r'[^\w\s]', ' ', text)
        text = re.sub(r'\s+', ' ', text)
        text = text.strip()


        words = text.split()


        try:
            stop_words = set(stopwords.words('english'))
            words = [w for w in words if w not in stop_words]
        except:
            pass


        try:
            lemmatizer = WordNetLemmatizer()
            words = [lemmatizer.lemmatize(w) for w in words]
        except:
            pass

        return ' '.join(words)
    except Exception as e:
        print(f"Error in clean_text: {str(e)}")
        return text

def main():

    download_nltk_resources()

    print("Loading and preparing data...")


if __name__ == "__main__":
    main()


import pickle

def load_model():

    with open('/content/drive/MyDrive/data/spam_model.pkl', 'rb') as model_file:
        model = pickle.load(model_file)

    with open('/content/drive/MyDrive/data/tfidf_vectorizer.pkl', 'rb') as vectorizer_file:
        vectorizer = pickle.load(vectorizer_file)

    return model, vectorizer

def predict_email(email_text, model, vectorizer):

    cleaned_text = clean_text(email_text)

    text_tfidf = vectorizer.transform([cleaned_text])

    prediction = model.predict(text_tfidf)
    probability = model.predict_proba(text_tfidf)

    return prediction[0], probability[0]


model, vectorizer = load_model()
new_email = """yo ur wom an ne eds an escapenumber in ch ma n b e th at ma n f or h er le arn h ow here tu rn of f not ific ati ons here escapelong dy international exports ltd st regina escapenumber belize city belize escapelong
"""
prediction, probability = predict_email(new_email, model, vectorizer)
print(f"Prediction: {prediction}")
print(f"Confidence: Spam: {probability[1]:.2%}, Ham: {probability[0]:.2%}")




Loading and preparing data...
Prediction: Spam
Confidence: Spam: 99.32%, Ham: 0.68%
