In [1]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline

# dataset
texts = [
    'buy cheap meds now',
    'cheap pills online',
    'get discount now',
    'meeting at office',
    'project discussion',
    'schedule a meeting'
]

labels = ['Spam' , 'Spam' , 'Spam' , 'Not Spam' , 'Not Spam' , 'Not Spam']

# Create a model pipeline
model = make_pipeline(CountVectorizer(), MultinomialNB(alpha = 0.1))    # Laplace

# Train the model
model.fit(texts, labels)

# Test Messages
test_messages = [
    'Cheap meds now',
    'Schedule a meeting',
    'Project schedule',
    'Discount online pills',
    'Meeting now'
]

# Make predictions
predicted = model.predict(test_messages)
probs = model.predict_proba(test_messages)

# Display results
for msg, label , prob in zip(test_messages, predicted, probs):
    print(f"Message: '{msg}'")
    print(f"Predicted Class: {label}")
    print(f"Probabilities: Spam={prob[model.classes_.tolist().index('Spam')]:.2f}, Not Spam={prob[model.classes_.tolist().index('Not Spam')]:.2f}\n")

Message: 'Cheap meds now'
Predicted Class: Spam
Probabilities: Spam=1.00, Not Spam=0.00

Message: 'Schedule a meeting'
Predicted Class: Not Spam
Probabilities: Spam=0.00, Not Spam=1.00

Message: 'Project schedule'
Predicted Class: Not Spam
Probabilities: Spam=0.00, Not Spam=1.00

Message: 'Discount online pills'
Predicted Class: Spam
Probabilities: Spam=1.00, Not Spam=0.00

Message: 'Meeting now'
Predicted Class: Not Spam
Probabilities: Spam=0.35, Not Spam=0.65



In [2]:
import numpy as np
import pandas as pd

In [3]:
df = pd.read_csv('Social_Network_Ads.csv' ,
                 usecols = ['Age' , 'EstimatedSalary' , 'Purchased'])

In [4]:
df.head(2)

Unnamed: 0,Age,EstimatedSalary,Purchased
0,19,19000,0
1,35,20000,0


In [5]:
x = df.drop(columns = ['Purchased'])
y = df['Purchased']

In [6]:
from sklearn.model_selection import train_test_split

In [7]:
x_train , x_test , y_train , y_test = train_test_split(x,y,test_size=0.2,random_state=42)

In [8]:
from sklearn.ensemble import RandomForestClassifier

In [9]:
rf = RandomForestClassifier()

In [10]:
rf.fit(x_train , y_train)

In [11]:
y_pred = rf.predict(x_test)

In [12]:
from sklearn.metrics import accuracy_score

In [13]:
accuracy_score(y_test , y_pred)

0.9125