In [301]:
import pandas as pd
import random
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load dataset
df = pd.read_csv('Social_Network_Ads.csv')
data = df[['Gender', 'Age', 'EstimatedSalary']].copy()
Y = df[['Purchased']]

# One-hot encode 'Gender' column
encoder = OneHotEncoder(sparse_output=False, drop='first', dtype=int)
encoder.fit(data[['Gender']])
data['Male'] = encoder.transform(data[['Gender']])[:, 0]
data.drop('Gender', axis=1, inplace=True)

# Split data into train and test sets
X_train, X_test, Y_train, Y_test = train_test_split(data, Y['Purchased'], test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
scaler.fit(X_train)
X_train[['Age', 'EstimatedSalary', 'Male']] = scaler.transform(X_train[['Age', 'EstimatedSalary', 'Male']])
X_test[['Age', 'EstimatedSalary', 'Male']] = scaler.transform(X_test[['Age', 'EstimatedSalary', 'Male']])

# Sigmoid function with clipping to avoid overflow
def sigmoid(z):
    z = np.clip(z, -500, 500)
    return 1 / (1 + np.exp(-z))

# Prediction function
def predict(A, B, C, D, data):
    z = A * data['Age'] + B * data['Male'] + C * data['EstimatedSalary'] + D
    probabilities = sigmoid(z)
    return [1 if prob >= 0.5 else 0 for prob in probabilities]

# Logistic regression using gradient descent
A, B, C, D = 1, 1, 1, -2  # Initialize weights and bias
learning_rate = 0.01

# Gradient Descent loop
for i in range(10000):
    rand = random.randint(0, X_train.shape[0] - 1)  # Random index for stochastic gradient descent
    X_current = X_train.iloc[rand]  # Use .iloc to index the row correctly
    target = Y_train.iloc[rand]  # Use .iloc to access the target correctly
    
    # Calculate the linear combination (z)
    z = A * X_current['Age'] + B * X_current['Male'] + C * X_current['EstimatedSalary'] + D
    
    # Update weights based on the gradient of the loss function
    if target == 1 and z < 0:
        A += X_current['Age'] * learning_rate
        B += X_current['Male'] * learning_rate
        C += X_current['EstimatedSalary'] * learning_rate
        D += 1 * learning_rate
        
    elif target == 0 and z >= 0:
        A -= X_current['Age'] * learning_rate
        B -= X_current['Male'] * learning_rate
        C -= X_current['EstimatedSalary'] * learning_rate
        D -= 1 * learning_rate

# Calculate accuracy
accuracy = accuracy_score(Y_test, predict(A, B, C, D, X_test))
print(f"Accuracy Score of Imitation: {accuracy}")

model = LogisticRegression()
model.fit(X_train,Y_train)
log_accuracy = accuracy_score(Y_test, model.predict(X_test))
print(f"Accuracy Score of Imitation: {log_accuracy}")


Accuracy Score of Imitation: 0.8375
Accuracy Score of Imitation: 0.8875
