In [1]:
import numpy as np
from sklearn.metrics import accuracy_score, classification_report
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from scipy.special import expit

In [2]:
def sigmoid(z):
    return expit(z)

In [3]:
# def compute_loss(y_true, y_pred):
#     return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

In [4]:
def logistic_regression_sgd(X_train, y_train, learning_rate=0.1, n_iter=3):
    n_samples, n_features = X_train.shape
    # Initialize weights and bias
    np.random.seed(42)
    weights = np.random.randn(n_features)
    bias = 0
    
    # Iterate for n_iter epochs
    for epoch in range(n_iter):
        total_loss = 0
        for i in range(n_samples):
            # Select a random data point
            rand_idx = np.random.randint(n_samples)
            X_i = X_train[rand_idx]
            y_i = y_train[rand_idx]
            
            # Compute prediction
            z = np.dot(X_i, weights) + bias
            y_pred = sigmoid(z)
            
            # Compute gradients
            gradient = (y_pred - y_i) * X_i
            bias_gradient = (y_pred - y_i)
            
            # Update weights and bias
            weights -= learning_rate * gradient
            bias -= learning_rate * bias_gradient
            
            # Compute current loss (for monitoring purposes)
            # total_loss += compute_loss(y_i, y_pred)
        
        # Print average loss for the epoch
        # avg_loss = total_loss / n_samples
        # if epoch % 100 == 0:
        #     print(f'Epoch {epoch}, Loss: {avg_loss:.4f}')
    
    return weights, bias

In [5]:
def predict(X, weights, bias):
    z = np.dot(X, weights) + bias
    y_pred = sigmoid(z)
    return (y_pred > 0.5).astype(int)

In [6]:
import pandas as pd
X_train=pd.read_csv('X_train1.csv')
X_train=X_train.to_numpy()
X_test=pd.read_csv('X_test1.csv')
X_test=X_test.to_numpy()
y_train=pd.read_csv('y_train1.csv')
y_train=y_train.to_numpy()
y_train=y_train.reshape(-1)
y_test=pd.read_csv('y_test1.csv')
y_test=y_test.to_numpy()
y_test=y_test.reshape(-1)

In [7]:
weights, bias = logistic_regression_sgd(X_train, y_train)

In [8]:
y_pred = predict(X_test, weights, bias)

In [9]:
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

Accuracy: 0.84


In [10]:
weights

array([  1.89671415,  20.46173575,  -1.25231146,   1.82302986,
        11.26584664,   0.06586304,   1.57921282,   3.16743473,
        -0.46947439, -58.85743973,  56.68658243,  -5.41572967,
       -11.4950377 , -37.21328017,   6.47508225])

In [11]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(solver='saga', max_iter=3, C=1.0)

# Training the model
model.fit(X_train, y_train)

# Making predictions
y_pred = model.predict(X_test)

# Evaluating the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

Accuracy: 0.84




In [12]:
model.coef_[0]

array([ 1.71291970e-04,  3.30044054e-03, -7.58323576e-04,  3.94520503e-05,
        3.11199338e-03, -6.94705758e-06,  3.35778347e-05,  3.77176811e-04,
        1.15765227e-05, -1.37069739e-02,  1.20186866e-02, -5.60656951e-04,
       -3.99210351e-03, -4.93129490e-03,  2.55930179e-03])