In [1]:
import numpy as np

# Sigmoid function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# Logistic Regression using Stochastic Gradient Descent
def logistic_regression_sgd(X, y, alpha=0.3, epochs=10):
    # Initialize coefficients (theta_0, theta_1, theta_2)
    n_samples, n_features = X.shape
    theta = np.zeros(n_features)

    # Gradient Descent for each epoch
    for epoch in range(epochs):
        for i in range(n_samples):
            xi = X[i]  # i-th sample features (x1, x2)
            yi = y[i]  # i-th sample label (y)

            # Calculate prediction using the sigmoid function
            z = np.dot(theta, xi)
            y_hat = sigmoid(z)

            # Update coefficients using gradient descent formula
            error = yi - y_hat
            gradient = alpha * error * y_hat * (1 - y_hat)
            theta += gradient * xi

        # After updating coefficients, calculate accuracy for the epoch
        predictions = predict(X, theta)
        predicted_classes = [1 if p >= 0.5 else 0 for p in predictions]
        accuracy = np.mean(predicted_classes == y) * 100

        # Print progress after each epoch
        print(f'Epoch {epoch+1}, Coefficients: {theta}, Accuracy: {accuracy:.2f}%')

    return theta

# Predict using the sigmoid function
def predict(X, theta):
    z = np.dot(X, theta)
    return sigmoid(z)

# Example data (X1, X2, and adding a bias term)
X = np.array([[2.7810836, 2.550537003],
              [1.465489372, 2.362125076],
              [3.396561688, 4.400293529],
              [1.38807019, 1.850220317],
              [3.06407232, 3.005305973],
              [7.627531214, 2.759262235],
              [5.332441248, 2.088626775],
              [6.922596716, 1.77106367],
              [8.675418651, -0.2420686549],
              [7.673756466, 3.508563011]])

# Labels (Y)
y = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])

# Add bias term (theta_0) to the dataset by adding a column of 1s
X_with_bias = np.c_[np.ones(X.shape[0]), X]

# Train the logistic regression model
theta = logistic_regression_sgd(X_with_bias, y, alpha=0.3, epochs=10)

# Print the final coefficients
print("Final Coefficients (theta_0, theta_1, theta_2):", theta)

# Test predictions
predictions = predict(X_with_bias, theta)
predicted_classes = [1 if p >= 0.5 else 0 for p in predictions]
print("Predicted Classes:", predicted_classes)

# Calculate and display final accuracy
final_accuracy = np.mean(predicted_classes == y) * 100
print(f"Final Accuracy: {final_accuracy:.2f}%")

Epoch 1, Coefficients: [-0.01622585  0.33886677 -0.08247447], Accuracy: 50.00%
Epoch 2, Coefficients: [-0.0886254   0.45880357 -0.28874   ], Accuracy: 60.00%
Epoch 3, Coefficients: [-0.15123927  0.57192103 -0.45315515], Accuracy: 80.00%
Epoch 4, Coefficients: [-0.21063249  0.63442462 -0.60689365], Accuracy: 90.00%
Epoch 5, Coefficients: [-0.25967653  0.67949925 -0.73224193], Accuracy: 100.00%
Epoch 6, Coefficients: [-0.29905581  0.72113296 -0.83230529], Accuracy: 100.00%
Epoch 7, Coefficients: [-0.33188348  0.75894968 -0.91562008], Accuracy: 100.00%
Epoch 8, Coefficients: [-0.36003118  0.79318422 -0.98698904], Accuracy: 100.00%
Epoch 9, Coefficients: [-0.38467939  0.82424538 -1.04937889], Accuracy: 100.00%
Epoch 10, Coefficients: [-0.40660546  0.85257332 -1.10474626], Accuracy: 100.00%
Final Coefficients (theta_0, theta_1, theta_2): [-0.40660546  0.85257332 -1.10474626]
Predicted Classes: [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
Final Accuracy: 100.00%


# Using sklearn

In [2]:
import pandas as pd 
import numpy as np 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix
from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plt
from skimpy import skim

In [3]:
df=pd.read_csv('/Users/mac/Desktop/Programming_Stuff/Machine Learning/Datasets/User_Data.csv')

In [4]:
df.shape

(400, 5)

In [5]:
skim(df)

In [6]:
df.drop(['User ID'], axis=1, inplace=True)

In [7]:
df

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,Male,19,19000,0
1,Male,35,20000,0
2,Female,26,43000,0
3,Female,27,57000,0
4,Male,19,76000,0
...,...,...,...,...
395,Female,46,41000,1
396,Male,51,23000,1
397,Female,50,20000,1
398,Male,36,33000,0


In [8]:
encoder=LabelEncoder()
Gender_encoded= encoder.fit_transform(df['Gender'])
df['Gender']=Gender_encoded

In [9]:
df

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,1,19,19000,0
1,1,35,20000,0
2,0,26,43000,0
3,0,27,57000,0
4,1,19,76000,0
...,...,...,...,...
395,0,46,41000,1
396,1,51,23000,1
397,0,50,20000,1
398,1,36,33000,0


In [10]:
X=df[['Age','EstimatedSalary']].values
y=df['Purchased'].values

In [11]:
X

array([[    19,  19000],
       [    35,  20000],
       [    26,  43000],
       [    27,  57000],
       [    19,  76000],
       [    27,  58000],
       [    27,  84000],
       [    32, 150000],
       [    25,  33000],
       [    35,  65000],
       [    26,  80000],
       [    26,  52000],
       [    20,  86000],
       [    32,  18000],
       [    18,  82000],
       [    29,  80000],
       [    47,  25000],
       [    45,  26000],
       [    46,  28000],
       [    48,  29000],
       [    45,  22000],
       [    47,  49000],
       [    48,  41000],
       [    45,  22000],
       [    46,  23000],
       [    47,  20000],
       [    49,  28000],
       [    47,  30000],
       [    29,  43000],
       [    31,  18000],
       [    31,  74000],
       [    27, 137000],
       [    21,  16000],
       [    28,  44000],
       [    27,  90000],
       [    35,  27000],
       [    33,  28000],
       [    30,  49000],
       [    26,  72000],
       [    27,  31000],


In [12]:
xtrain,xtest,ytrain,ytest=train_test_split(X,y,test_size=0.2,random_state=0)

In [13]:
sg_c=StandardScaler()
sg_c.fit_transform(xtrain)

array([[ 1.92295008e+00,  2.14601566e+00],
       [ 2.02016082e+00,  3.78719297e-01],
       [-1.38221530e+00, -4.32498705e-01],
       [-1.18779381e+00, -1.01194013e+00],
       [ 1.92295008e+00, -9.25023920e-01],
       [ 3.67578135e-01,  2.91803083e-01],
       [ 1.73156642e-01,  1.46942725e-01],
       [ 2.02016082e+00,  1.74040666e+00],
       [ 7.56421121e-01, -8.38107706e-01],
       [ 2.70367388e-01, -2.87638347e-01],
       [ 3.67578135e-01, -1.71750061e-01],
       [-1.18475597e-01,  2.20395980e+00],
       [-1.47942605e+00, -6.35303205e-01],
       [-1.28500455e+00, -1.06988428e+00],
       [-1.38221530e+00,  4.07691369e-01],
       [-1.09058306e+00,  7.55356227e-01],
       [-1.47942605e+00, -2.00722133e-01],
       [ 9.50842613e-01, -1.06988428e+00],
       [ 9.50842613e-01,  5.81523798e-01],
       [ 3.67578135e-01,  9.87132798e-01],
       [ 5.61999628e-01, -8.96051849e-01],
       [-6.04529329e-01,  1.45068594e+00],
       [-2.12648508e-02, -5.77359062e-01],
       [-6.

In [14]:
classifier=LogisticRegression(random_state=0,max_iter=10000)
classifier.fit(xtrain,ytrain)

In [15]:
ypred=classifier.predict(xtest)
cm=confusion_matrix(ytest,ypred)
accuarcy=accuracy_score(ytest,ypred)

In [16]:
print(cm)
print('the accuarcy is:',accuarcy)

[[56  2]
 [ 5 17]]
the accuarcy is: 0.9125


In [17]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

X_set, y_set = xtest, ytest
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01),
                     np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01))
plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),
             alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
                c = ListedColormap(('red', 'green'))(i), label = j)
plt.title('Logistic Regression (Test set)')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show()

: 

In [None]:
print('its been done')