In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score


In [2]:
ads = pd.read_csv("Social_Network_Ads.csv")

In [3]:
ads['Gender'] = ads['Gender'].map({'Male': 1, 'Female': 0})

In [4]:
X = ads.drop(['User ID', 'Purchased'], axis=1)
y = ads['Purchased']


In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

In [6]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [7]:
models = {
    "Logistic Regression": LogisticRegression(),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Support Vector Classifier": SVC(),
    "Random Forest": RandomForestClassifier()
}

In [8]:
model_accuracies = {}

for model_name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    model_accuracies[model_name] = accuracy

In [9]:
print("Model Accuracies: ")
for model_name, accuracy in model_accuracies.items():
    print(f"{model_name}: {accuracy:.4f}")

Model Accuracies: 
Logistic Regression: 0.8125
K-Nearest Neighbors: 0.9375
Support Vector Classifier: 0.9125
Random Forest: 0.8750


In [10]:
best_model = models["Random Forest"]


In [11]:
import pickle
with open('model.pkl', 'wb') as f:
    pickle.dump(best_model, f)