In [None]:
import pandas as pd

In [None]:
df = pd.read_csv("datasets/Social_Network_Ads.csv")

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
print("Numeric features : ", df.select_dtypes(include=['int64']).columns)
print("Nominal features : ", df.select_dtypes(exclude=['int64']).columns)

In [None]:
"""
Encoding categorical variables
"""

df['Gender'].replace(['Male', 'Female'], [1, 0], inplace=True)
df.info()

from sklearn import preprocessing
lable_encoder = preprocessing.LabelEncoder()
df['Gender'] = lable_encoder.fit_transform(df['Gender'])

In [None]:
"""
Split into features and target variables
"""
X = df.drop(columns=['User ID', 'Purchased'])
Y = df[['Purchased']]

In [None]:
"""
Split into train and test
"""
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

In [None]:
"""
Feature scaling, StandardScaler is a preprocessing technique used in machine learning to standardize features by removing the 
mean and scaling to unit variance. It transforms the data in such a way that it has a mean of 0 and a standard deviation of 1.
"""
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train['EstimatedSalary'] = scaler.fit_transform(X_train[['EstimatedSalary']])
X_test['EstimatedSalary'] = scaler.fit_transform(X_test[['EstimatedSalary']])

In [None]:
"""
Training the model
"""
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(X_train, Y_train)

Y_pred = model.predict(X_test)

In [None]:
"""
Calculating metrics
"""
from sklearn.metrics import mean_squared_error, accuracy_score, confusion_matrix, classification_report

mse = mean_squared_error(Y_test, Y_pred)
print("MSE : ", mse)
accuracy = accuracy_score(Y_test, Y_pred)
print("Accuracy : ", accuracy)

conf_matrix = confusion_matrix(Y_test, Y_pred)
print(conf_matrix)

TN, FP, FN, TP = conf_matrix.ravel()
precision = TP / (TP + FP)
recall = TP / (TP + FN)
f1_score = 2 * (precision * recall) / (precision + recall)
error_rate = (FP + FN) / (TP + TN + FP + FN)

print("True negative : ", TN)
print("False positive : ", FP)
print("False negative : ", FN)
print("True positive : ", TP)
print("Precision : ", precision)
print("Recall : ", recall)
print("F1 Score : ", f1_score)
print("Error rate : ", error_rate)