In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
columns = ['Sex', 'Length', 'Diameter', 'Height', 'WholeWeight', 'ShuckedWeight', 'VisceraWeight', 'ShellWeight', 'Rings']

In [None]:
data = pd.read_csv("./datasets/abalone/abalone.data", header=None, names=columns)

In [None]:
data['Sex'] = data['Sex'].map({'M': 0, 'F': 1, 'I': 2})

In [None]:
data.info()

In [None]:
X = data.drop(columns=['Rings'])
y = data['Rings']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
def initialize_weights(dim):
    weights = np.zeros(dim)
    bias = 0
    return weights, bias

In [None]:
def predict(X, weights, bias):
    return np.dot(X, weights) + bias


In [None]:
def compute_loss(y_true, y_pred):
    m = len(y_true)
    loss = (1/(2*m)) * np.sum((y_pred - y_true) ** 2)
    return loss


In [None]:
def compute_gradients(X, y_true, y_pred):
    m = len(y_true)
    dw = (1/m) * np.dot(X.T, (y_pred - y_true))
    db = (1/m) * np.sum(y_pred -y_true)
    return dw, db

In [None]:
def update_parameters(weights, bias, dw, db, learning_rate):
    weights -= learning_rate * dw
    bias -= learning_rate * db
    return weights, bias

In [None]:
def train_linear_regression(X, y, learning_rate, epochs):
    weights, bias = initialize_weights(X.shape[1])

    for i in range(epochs):

        y_pred = predict(X, weights, bias)

        loss = compute_loss(y, y_pred)

        dw, db = compute_gradients(X, y, y_pred)

        weights, bias = update_parameters(weights, bias, dw, db, learning_rate)
        
        print(f"Epoch {i}: Loss = {loss:.4f}")
    return weights, bias

In [None]:
learning_rate = 0.01
epochs = 1000
weights, bias = train_linear_regression(X_train, y_train, learning_rate, epochs)

In [None]:
def evaluate_model(X, y, weights, bias):
    y_pred = predict(X, weights, bias)
    mse = compute_loss(y, y_pred)
    return mse

In [None]:
mse_test = evaluate_model(X_test, y_test, weights, bias)
print(f"Test MSE: {mse_test:.4f}")

In [None]:
def show_regression_equation(weights, bias):
    eqn = "y = "
    for i, weight in enumerate(weights):
        eqn += f"{weight:.4f} * {data.columns[i]} + "
    eqn += f"{bias:.4f}"
    print(f"Regression Equation: {eqn}")

In [None]:
show_regression_equation(weights, bias)

In [None]:
def plot_regression(X_test, y_test, weights, bias):
    y_pred = predict(X_test, weights, bias)
    
    plt.figure(figsize=(10, 6))
    plt.scatter(y_test, y_pred, color='blue', alpha=0.5, label='Predicted vs Actual')
    plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], color='red', linestyle='--', label='Ideal Line')
    plt.xlabel('Actual Values')
    plt.ylabel('Predicted Values')
    plt.title('Regression: Predicted vs Actual Values')
    plt.legend()
    plt.show()

In [None]:
plot_regression(X_test, y_test, weights, bias)