In [311]:
import pandas as pd
import random
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
import numpy as np
import math
# Load dataset
df = pd.read_csv('Social_Network_Ads.csv')
data = df[['Gender', 'Age', 'EstimatedSalary']].copy()
Y = df[['Purchased']]

# One-hot encode 'Gender' column
encoder = OneHotEncoder(sparse_output=False, drop='first', dtype=int)
encoder.fit(data[['Gender']])
data['Male'] = encoder.transform(data[['Gender']])[:, 0]
data.drop('Gender', axis=1, inplace=True)
data_temp = data.copy()
data_temp['Purchased']=Y
# Split data into train and test sets
X_train, X_test, Y_train, Y_test = train_test_split(data, Y['Purchased'], test_size=0.2, random_state=42)
# Feature scaling
scaler = StandardScaler()
scaler.fit(X_train)
X_train[['Age', 'EstimatedSalary', 'Male']] = scaler.transform(X_train[['Age', 'EstimatedSalary', 'Male']])
X_test[['Age', 'EstimatedSalary', 'Male']] = scaler.transform(X_test[['Age', 'EstimatedSalary', 'Male']])
# Sigmoid function with clipping to avoid overflow
def sigmoid(z):
    z = np.clip(z, -500, 500)
    return 1 / (1 + np.exp(-z))

# Prediction function
def predict(A, B, C, D, data):
    z = A * data['Age'] + B * data['Male'] + C * data['EstimatedSalary'] + D
    probabilities = sigmoid(z)
    return [1 if prob >= 0.47 else 0 for prob in probabilities]

def distance_to_plane(a, b, c, d, x1, y1, z1):
    """
    Calculate the shortest distance from a point to a plane.

    Parameters:
    a, b, c, d : float - Coefficients of the plane equation ax + by + cz + d = 0
    x1, y1, z1 : float - Coordinates of the point (x1, y1, z1)

    Returns:
    float - Distance from the point to the plane
    """
    numerator = abs(a * x1 + b * y1 + c * z1 + d)
    denominator = math.sqrt(a**2 + b**2 + c**2)
    return numerator / denominator

# Logistic regression using gradient descent
A, B, C, D = 10, 10, 10, -2  # Initialize weights and bias
learning_rate = 0.05
# Gradient Descent loop
epoch = 10

for j in range(epoch):
    random_list = list(range(len(X_train)))
    random.shuffle(random_list)  # Shuffle for stochastic updates
    new_A, new_B, new_C, new_D = A, B, C, D  # Initialize new weights
    
    for i in random_list:
        X_current = X_train.iloc[i]
        target = Y_train.iloc[i]
        
        # Calculate the linear combination (z)
        z = A * X_current['Age'] + B * X_current['Male'] + C * X_current['EstimatedSalary'] + D

        distance = distance_to_plane(A, B, C, D,X_current['Age'] , X_current['Male'] ,X_current['EstimatedSalary'])

        # Update weights based on the gradient of the loss function
        if target == 1 and z < 0:
            new_A += X_current['Age'] * learning_rate *distance
            new_B += X_current['Male'] * learning_rate *distance
            new_C += X_current['EstimatedSalary'] * learning_rate *distance
            new_D += 1 * learning_rate*distance

        elif target == 0 and z >= 0:
            new_A -= X_current['Age'] * learning_rate*distance
            new_B -= X_current['Male'] * learning_rate*distance
            new_C -= X_current['EstimatedSalary'] * learning_rate*distance
            new_D -= 1 * learning_rate*distance

    # Exit condition: Check for convergence
    accuracy = accuracy_score(Y_test, predict(new_A,new_B, new_C, new_D, X_test))
    print(f"Accuracy Score of Epoch {j}: {accuracy}")
    if np.all(np.isclose((A, B, C, D), (new_A, new_B, new_C, new_D))):
        print("Convergence reached.")
        break

    # Update weights for the next epoch
    A, B, C, D = new_A, new_B, new_C, new_D

# Calculate accuracy
accuracy = accuracy_score(Y_test, predict(A, B, C, D, X_test))
print(f"Accuracy Score of Imitation: {accuracy}")

model = LogisticRegression()
model.fit(X_train,Y_train)
log_accuracy = accuracy_score(Y_test, model.predict(X_test))
print(f"Accuracy Score of Logistic Regression: {log_accuracy}")

Accuracy Score of Epoch 0: 0.8
Accuracy Score of Epoch 1: 0.85
Accuracy Score of Epoch 2: 0.85
Accuracy Score of Epoch 3: 0.85
Accuracy Score of Epoch 4: 0.8375
Accuracy Score of Epoch 5: 0.85
Accuracy Score of Epoch 6: 0.8625
Accuracy Score of Epoch 7: 0.8625
Accuracy Score of Epoch 8: 0.8625
Accuracy Score of Epoch 9: 0.8875
Accuracy Score of Imitation: 0.8875
Accuracy Score of Logistic Regression: 0.8875
