# Created By
- Mohammed Essam Mohammed 20220299
- Amr Ehab Abdel Zahir 20221110

## Task 1

In [13]:
%pip install pandas scikit-learn numpy matplotlib
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

Note: you may need to restart the kernel to use updated packages.


In [14]:
np.random.seed(42)
X = 2 * np.random.rand(1000, 1)
y = 4 * X - 2 + np.random.randn(1000, 1) * 0.5  

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Train", X_train.shape)
print("Test", X_test.shape)

Train (800, 1)
Test (200, 1)


In [16]:
def BatchGradientDescent(X, y, learning_rate=0.01, epochs=1000):
    weights = np.random.rand(X.shape[1], 1)
    for _ in range(epochs):
        predictions = np.dot(X, weights)
        error = y - predictions
        weights += learning_rate * np.dot(X.T, error) / X.shape[0]
    return weights

In [17]:
def StochGradDesc(X, y, learning_rate=0.01, epochs=1000):
    weights = np.random.rand(X.shape[1])
    for _ in range(epochs):
        for i in range(X.shape[0]):
            prediction = np.dot(X[i], weights)
            error = y[i] - prediction
            weights += learning_rate * error * X[i]
    return weights

In [18]:
def MiniBatchGradDesc(X, y, learning_rate=0.01, epochs=1000, batch_size=10):
    weights = np.random.rand(X.shape[1], 1)
    for _ in range(epochs):
        for i in range(0, X.shape[0], batch_size):
            X_batch = X[i:i+batch_size]
            y_batch = y[i:i+batch_size]
            prediction = np.dot(X_batch, weights)
            error = y_batch - prediction
            weights += learning_rate * np.dot(X_batch.T, error) / batch_size
    return weights


In [19]:
from sklearn.metrics import mean_squared_error

lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)
y_pred_sk = lin_reg.predict(X_test)
mse_sk = mean_squared_error(y_test, y_pred_sk)
print(f"LR MSE: {mse_sk}")

BGD_weights = BatchGradientDescent(X_train, y_train)
BGD_pred = np.dot(X_test, BGD_weights)
mse_bgd = mean_squared_error(y_test, BGD_pred)
print(f"BGD MSE: {mse_bgd}")

SGD_weights = StochGradDesc(X_train, y_train)
SGD_pred = np.dot(X_test, SGD_weights)
mse_sgd = mean_squared_error(y_test, SGD_pred)
print(f"SGD MSE: {mse_sgd}")

MB_weights = MiniBatchGradDesc(X_train, y_train)
MB_pred = np.dot(X_test, MB_weights)
mse_mbgd = mean_squared_error(y_test, MB_pred)
print(f"MB MSE: {mse_mbgd}")

LR MSE: 0.21434311664543843
BGD MSE: 1.2485313727443748
SGD MSE: 1.2487037809455008
MB MSE: 1.2483199573929982


## Task 2

In [20]:
%pip install pandas scikit-learn
import pandas as pd
import numpy as np
from sklearn.datasets import make_classification
# Generate binary classification data
X, y = make_classification(n_samples=1000, n_features=2, n_informative=2, n_redundant=0, n_clusters_per_class=1, random_state=42)

# Convert to DataFrame for easier handling
data = pd.DataFrame(X, columns=['Feature1', 'Feature2'])
data['Label'] = y

print(data.head())

Note: you may need to restart the kernel to use updated packages.
   Feature1  Feature2  Label
0  0.601034  1.535353      1
1  0.755945 -1.172352      0
2  1.354479 -0.948528      0
3  3.103090  0.233485      0
4  0.753178  0.787514      1


In [21]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Train", X_train.shape)
print("Test", X_test.shape)

Train (800, 2)
Test (200, 2)


In [22]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [23]:
def BatchGradientDescent(X, y, learning_rate=0.01, epochs=100):
    weights = np.random.rand(X.shape[1])
    for _ in range(epochs):
        predictions = sigmoid(np.dot(X, weights))
        error = y - predictions
        weights += learning_rate * np.dot(X.T, error)
    return weights

BGD_weights = BatchGradientDescent(X_train, y_train)
BGD_pred = sigmoid(np.dot(X_test, BGD_weights)) > 0.5

In [24]:
def StochasticGradientDescent(X, y, learning_rate=0.01, epochs=100):
    weights = np.random.rand(X.shape[1])
    for _ in range(epochs):
        for i in range(X.shape[0]):
            xi = X[i, :].reshape(1, -1)
            yi = y[i]
            prediction = sigmoid(np.dot(xi, weights))
            error = yi - prediction
            weights += learning_rate * np.dot(xi.T, error)
    return weights

SGD_weights = StochasticGradientDescent(X_train, y_train)
SGD_pred = sigmoid(np.dot(X_test, SGD_weights)) > 0.5

In [25]:
def MiniBatchGradientDescent(X, y, learning_rate=0.01, epochs=100, batch_size=32):
    weights = np.random.rand(X.shape[1])
    for _ in range(epochs):
        indices = np.random.permutation(X.shape[0])
        X_shuffled = X[indices]
        y_shuffled = y[indices]
        for start in range(0, X.shape[0], batch_size):
            end = start + batch_size
            X_batch = X_shuffled[start:end]
            y_batch = y_shuffled[start:end]
            predictions = sigmoid(np.dot(X_batch, weights))
            error = y_batch - predictions
            weights += learning_rate * np.dot(X_batch.T, error) / batch_size
    return weights

MBGD_weights = MiniBatchGradientDescent(X_train, y_train)
MBGD_pred = sigmoid(np.dot(X_test, MBGD_weights)) > 0.5

In [26]:
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression

BGD_accuracy = accuracy_score(y_test, BGD_pred)
print(f"BGD: {BGD_accuracy}")

SGD_accuracy = accuracy_score(y_test, SGD_pred)
print(f"SGD: {SGD_accuracy}")

MBGD_accuracy = accuracy_score(y_test, MBGD_pred)
print(f"MBGD: {MBGD_accuracy}")

log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
y_pred_sk = log_reg.predict(X_test)

accuracy_sk = accuracy_score(y_test, y_pred_sk > 0.5)
print(f"LR: {accuracy_sk}")

BGD: 0.84
SGD: 0.895
MBGD: 0.895
LR: 0.9
