# Week 2 — From-Scratch ML: Code Pack Demo

In [1]:
import numpy as np
from src.linear_regression import LinearRegressionGD, LinearRegressionNormal
from src.logistic_regression import LogisticRegressionGD
from src.decision_tree import DecisionTreeClassifierScratch, DecisionTreeRegressorScratch
from src.random_forest import RandomForestClassifierScratch, RandomForestRegressorScratch
from src.svm import LinearSVMClassifierSGD
from src.metrics import mae, mse, rmse, r2, accuracy, precision, recall, f1
from src.utils import train_test_split, standardize, make_regression, make_classification

## Linear Regression (Normal Equation vs Gradient Descent)

In [2]:
X, y, w_true = make_regression(n_samples=400, n_features=3, noise=0.7, random_state=42)
X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.25, random_state=0)

lin_ne = LinearRegressionNormal(fit_intercept=True).fit(X_tr, y_tr)
pred_ne = lin_ne.predict(X_te)
print("NormalEq: RMSE=%.3f  R2=%.3f" % (rmse(y_te, pred_ne), r2(y_te, pred_ne)))

lin_gd = LinearRegressionGD(lr=0.1, n_epochs=500, l2=1e-4, fit_intercept=True, random_state=0).fit(X_tr, y_tr)
pred_gd = lin_gd.predict(X_te)
print("GradDesc: RMSE=%.3f  R2=%.3f" % (rmse(y_te, pred_gd), r2(y_te, pred_gd)))

NormalEq: RMSE=0.669  R2=0.330
GradDesc: RMSE=0.669  R2=0.330


## Logistic Regression (Binary)

In [3]:
X, y = make_classification(n_samples=400, random_state=7)
X, mean, std = standardize(X)
X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.3, random_state=1)

logit = LogisticRegressionGD(lr=0.2, n_epochs=2000, l2=1e-3, random_state=0).fit(X_tr, y_tr)
y_pred = logit.predict(X_te)
print("Accuracy=%.3f  Precision=%.3f  Recall=%.3f  F1=%.3f" % (accuracy(y_te,y_pred), precision(y_te,y_pred), recall(y_te,y_pred), f1(y_te,y_pred)))

Accuracy=1.000  Precision=1.000  Recall=1.000  F1=1.000


## Decision Trees (Classifier & Regressor)

In [4]:
# Classifier
Xc, yc = make_classification(n_samples=300, random_state=3)
Xc, mean, std = standardize(Xc)
Xc_tr, Xc_te, yc_tr, yc_te = train_test_split(Xc, yc, test_size=0.3, random_state=3)

dtc = DecisionTreeClassifierScratch(max_depth=4, random_state=0).fit(Xc_tr, yc_tr)
yc_pred = dtc.predict(Xc_te)
print("DT Classifier: Acc=%.3f  F1=%.3f" % (accuracy(yc_te,yc_pred), f1(yc_te,yc_pred)))

# Regressor
Xr, yr, _ = make_regression(n_samples=300, n_features=1, noise=2.0, random_state=5)
Xr_tr, Xr_te, yr_tr, yr_te = train_test_split(Xr, yr, test_size=0.3, random_state=4)
dtr = DecisionTreeRegressorScratch(max_depth=3, random_state=0).fit(Xr_tr, yr_tr)
yr_pred = dtr.predict(Xr_te)
print("DT Regressor: RMSE=%.3f  R2=%.3f" % (rmse(yr_te, yr_pred), r2(yr_te, yr_pred)))

DT Classifier: Acc=1.000  F1=1.000
DT Regressor: RMSE=2.088  R2=-0.059


## Random Forest (Classifier & Regressor)

In [5]:
# Classifier
rfc = RandomForestClassifierScratch(n_estimators=50, max_depth=6, max_features=1, random_state=0).fit(Xc_tr, yc_tr)
yc_pred_rf = rfc.predict(Xc_te)
print("RF Classifier: Acc=%.3f  F1=%.3f" % (accuracy(yc_te,yc_pred_rf), f1(yc_te,yc_pred_rf)))

# Regressor
rfr = RandomForestRegressorScratch(n_estimators=50, max_depth=5, max_features=1, random_state=0).fit(Xr_tr, yr_tr)
yr_pred_rf = rfr.predict(Xr_te)
print("RF Regressor: RMSE=%.3f  R2=%.3f" % (rmse(yr_te, yr_pred_rf), r2(yr_te, yr_pred_rf)))

RF Classifier: Acc=1.000  F1=1.000
RF Regressor: RMSE=2.065  R2=-0.036


## Linear SVM (Classifier via SGD)

In [6]:
Xsvm, ysvm = make_classification(n_samples=400, random_state=11)
Xsvm, mean, std = standardize(Xsvm)
Xtr, Xte, ytr, yte = train_test_split(Xsvm, ysvm, test_size=0.25, random_state=2)

svm = LinearSVMClassifierSGD(lr=0.05, n_epochs=30, lam=1e-3, random_state=0).fit(Xtr, ytr)
yp = svm.predict(Xte)
print("SVM (linear) Accuracy=%.3f  F1=%.3f" % (accuracy(yte, yp), f1(yte, yp)))

SVM (linear) Accuracy=1.000  F1=1.000
