In [1]:
import mlflow
import mlflow.sklearn


# Set MLflow tracking URI to your Dagshub instance
mlflow.set_tracking_uri("https://dagshub.com/AhkkashK/ML-IN-PROD.mlflow")



In [2]:
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer

# Initialize stopwords and stemmer once (better performance)
stop_words = set(stopwords.words('english'))
stemmer = PorterStemmer()

def transform_text(text):
    # Lowercase and tokenize the text
    words = nltk.word_tokenize(text.lower())
    
    # Filter out non-alphanumeric words and stopwords, and apply stemming
    words = [
        stemmer.stem(word) for word in words
        if word.isalnum() and word not in stop_words
    ]
    
    # Join the words back into a single string
    return " ".join(words)

In [3]:
import mlflow
import mlflow.sklearn


# Load random forest
model_uri = "runs:/8815fb7d7d9f4085bd7c79bd812fafc2/random_forest_model"  

model = mlflow.sklearn.load_model(model_uri)

# Load vector TF-IDF 
tfidf_uri = "runs:/8815fb7d7d9f4085bd7c79bd812fafc2/tfidf_vectorizer"
tfidf = mlflow.sklearn.load_model(tfidf_uri)

  from .autonotebook import tqdm as notebook_tqdm
Downloading artifacts: 100%|██████████| 5/5 [00:00<00:00,  6.03it/s]
Downloading artifacts: 100%|██████████| 5/5 [00:02<00:00,  2.37it/s]


In [4]:
input_message = [
    "Hey, I booked a table for the two of us tonight at 7pm, does that work for you?",
    "Did you see that movie at the theater yesterday? It was amazing, we should go next time!",
    "I just sent you the details for the concert, did you check it out?",
    "Got any plans for tonight? We could meet for a drink if you’re up for it.",
    "Congratulations! You've won a free trip to Paris, reply to confirm!",
    "Your credit card has been compromised. Click here to secure it now.",
    "Claim your prize! You've won a 100€ gift voucher! Reply to get it.",
    "URGENT: Your bank account has been suspended. Click here to verify immediately.",
    "Win a free iPhone, just fill out this online form to claim it!",
    "We've noticed suspicious activity on your PayPal account. Log in to secure it."
]


In [5]:
processed_input = [transform_text(message) for message in input_message]
X_input = tfidf.transform(processed_input)
prediction = model.predict(X_input)
for i, message in enumerate(input_message):
    result = "spam" if prediction[i] == 1 else "ham"
    print(f"Message: {message}\nPrediction: {result}\n")


Message: Hey, I booked a table for the two of us tonight at 7pm, does that work for you?
Prediction: ham

Message: Did you see that movie at the theater yesterday? It was amazing, we should go next time!
Prediction: ham

Message: I just sent you the details for the concert, did you check it out?
Prediction: ham

Message: Got any plans for tonight? We could meet for a drink if you’re up for it.
Prediction: ham

Message: Congratulations! You've won a free trip to Paris, reply to confirm!
Prediction: ham

Message: Your credit card has been compromised. Click here to secure it now.
Prediction: ham

Message: Claim your prize! You've won a 100€ gift voucher! Reply to get it.
Prediction: spam

Message: URGENT: Your bank account has been suspended. Click here to verify immediately.
Prediction: ham

Message: Win a free iPhone, just fill out this online form to claim it!
Prediction: spam

Message: We've noticed suspicious activity on your PayPal account. Log in to secure it.
Prediction: ham

