In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
data = pd.read_csv("../data/2_students.csv")
data.head()

Changing string values of 'Extracurricular Activities' column into binary


In [None]:
data.loc[data["Extracurricular Activities"] == "Yes", "Extracurricular Activities"] = 1
data.loc[data["Extracurricular Activities"] == "No", "Extracurricular Activities"] = 0

In [None]:
def zscore(data: pd.Series):
    """
    For Normalisation
    """
    mean = data.mean()
    n = data.size
    sigma = (np.sum((data - mean) ** 2) / (n - 1)) ** (0.5)
    data = (data - mean) / sigma
    return data

Z Score Normalisation


In [None]:
data = data.apply(
    lambda col: (
        zscore(col)
        if col.name not in ["Extracurricular Activities", "Performance Index"]
        else col
    )
)
# No Normalisation on binary columns
data.head()

Implementation


In [None]:
def cost_func(wv: pd.Series, xv: pd.DataFrame, b: float, y_actR: float):
    """
    wv=[], size=5

    xv=[
    [],
    [],
    []
    ], size=10k
    """
    m = np.size(xv)
    y_predR = xv.dot(wv) + b
    return np.sum(((y_predR - y_actR) ** 2)) / (2 * m)

In [None]:
def gradients(wv: pd.Series, data: pd.DataFrame, b: float, yv_act: pd.Series):
    """
    Returns

    df_dw: pd.Series,
    df_db: float
    """
    xv = data.drop(columns=["Performance Index"])
    m = xv.index.size
    yv_pred = xv.dot(wv) + b
    df_dw = pd.Series(
        np.dot(xv.T, (yv_pred - yv_act)) / m,
        index=data.columns.drop(["Performance Index"]),
    )
    df_db = (np.sum((yv_pred - yv_act))) / m  # float
    return (df_dw, df_db)

In [None]:
def descent(
    data: pd.DataFrame,
    yv_act: pd.Series,
    wv_init: pd.Series,
    b_init: float,
    alpha: float,
):
    """
    The main func
    """
    xv = data.drop(columns=["Performance Index"])
    max_iterations = 10000
    history = pd.DataFrame(
        columns=["wv", "b", "cost_func", "wv_grad", "b_grad"],
        index=np.arange(0, max_iterations),
    )
    grad = gradients(wv_init, data, b_init, yv_act)

    df_dw = grad[0]
    df_db = grad[1]
    wv = wv_init - (alpha * df_dw)
    b = b_init - (alpha * df_db)
    tolerance = 1e-6
    iterations = 0
    while (
        abs(df_db) > tolerance or np.linalg.norm(df_dw) > tolerance
    ) and iterations < max_iterations:
        grad = gradients(wv, data, b, yv_act)
        df_dw = grad[0]
        df_db = grad[1]
        wv = wv - (alpha * df_dw)
        b = b - (alpha * df_db)
        history.loc[iterations] = [
            wv.to_list(),
            b,
            cost_func(wv, xv, b, yv_act),
            df_dw.to_list(),
            df_db,
        ]
        iterations += 1
        if iterations % 1000 == 0:
            print(f"\r{iterations}/10000 Done!")

    history.dropna(inplace=True)
    # history.to_csv("./history2.csv") Not really required
    return history

Running the algorithm
(Taking 2 different alphas for the Learning Curve)

In [None]:
yv_act = data["Performance Index"]
wv_init = pd.Series([0, 0, 0, 0, 0], index=data.columns.drop("Performance Index"))
b_init = 0
alpha_1 = 0.03
alpha_2 = 0.001
newdata = descent(data, yv_act, wv_init, b_init, alpha_1)

In [None]:
newdata2 = descent(data, yv_act, wv_init, b_init, alpha_2)

Learning Curve for 2 different Alphas

In [None]:
fig = plt.figure(figsize=(16, 6))
sns.lineplot(
    data=newdata,
    x=newdata.index.set_names("Iterations"),
    y=newdata["cost_func"],
    color="red",
    label="Alpha: 0.03",
)
sns.lineplot(
    data=newdata2,
    x=newdata2.index.set_names("Iterations"),
    y=newdata2["cost_func"],
    color="blue",
    label="Alpha: 0.001",
)