# 03 — Regression

When? Continuous value prediction (linear regression) or probability/class (logistic).

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

reg = pd.read_csv("../data/linear_regression.csv")

# View data
reg.head()

## Fitting a simple line with numpy.polyfit

In [None]:
coef = np.polyfit(reg["x"], reg["y"], deg=1)
slope, intercept = coef[0], coef[1]
slope, intercept

In [None]:
plt.figure()
plt.scatter(reg["x"], reg["y"], alpha=0.6)
xline = np.linspace(reg["x"].min(), reg["x"].max(), 100)
yline = slope*xline + intercept
plt.plot(xline, yline)
plt.title("Linear Regression (polyfit)")
plt.xlabel("x")
plt.ylabel("y")
plt.show()

Note: For more advanced regressions (categorical, diagnostic, logistic effects) you can use `statsmodels`.

In [None]:
# Simple logistics example with synthetic data
from scipy.special import expit

np.random.seed(1)
X = np.random.normal(size=400)
# Real model: logit(p)= -0.5 + 2*X
p = expit(-0.5 + 2*X)
y = np.random.binomial(1, p)

# Very simple logistic gradient descent fitting (demonstration)
# (For real project: statsmodels or scikit-learn is recommended.)
beta0, beta1 = 0.0, 0.0
lr = 0.01
for _ in range(2000):
    z = beta0 + beta1*X
    pred = expit(z)
    g0 = np.sum(pred - y)
    g1 = np.sum((pred - y)*X)
    beta0 -= lr*g0/len(X)
    beta1 -= lr*g1/len(X)

beta0, beta1