<a href="https://colab.research.google.com/github/Kevin-White/TablePlusPlus/blob/main/HW2_swertnds.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [121]:
import numpy as np
import matplotlib.pyplot as plt

# Problem 1
## Dataset Generation

Write a function to **generate a training set** of size $m$
- randomly generate a weight vector $w \in \mathbb{R}^{10}$, normalize length
- generate a training set $\{(x_i , y_i)\}$ of size m
  - $x_i$: random vector in $\mathbb{R}^{10}$ from $\textbf{N}(0, I)$
  - $y_i$: $\{0, +1\}$ with $P[y = +1] = \sigma(w \cdot x_i)$ and $P[y = 0] = 1 - \sigma(w \cdot x_i)$

In [122]:
from re import X
import time
import random
from scipy.special import expit

#sigmoid function
def sigmoid(x):
  return 1/ (1 + np.exp(-x))


#number training function
def wstar_set(m):
  wstar = np.random.randn(10)
  wstar /= np.linalg.norm(wstar)
  return wstar

def X_set(m):
  X = np.random.randn(m, 10)
  return X

def Y_set(X, wstar):
  p = sigmoid(wstar.dot(X))
  if np.random.uniform() :
    n = 1
  else:
    n= 0
  Y = np.vecotrize(n)
  return Y


## Algorithm 1: logistic regression

The goal is to learn $w$.  Algorithm 1 is logistic
  regression (you may use the built-in method LogisticRegression for this. Use max_iter=1000).

In [123]:
from sklearn.linear_model import LogisticRegression

def logistic_reg (X,Y):
  lr = LogisticRegression(max_iter=1000)
  lr.fit(X, Y)
  w_lr = lr.coef_[0]
  return w_lr





## Algorithm 2: gradient descent with square loss

Define square loss as
$$L_i(w^{(t)}) = \frac{1}{2} \left( \sigma(w^{(t)} \cdot x) - y_i \right)^2$$

  Algorithm 2 is
  gradient descent with respect to square loss (code this
  up yourself -- run for 1000 iterations, use step size eta = 0.01).

In [124]:
def grad_desc (X,Y,w, learning_rate, num_itt):
  m = len(X)
  for _ in range(num_itt):
    p=sigmoid(np.dot(X,w))
    grad = np.dot(X.T, (p-Y))/m
    w_gd -= learning_rate*grad
  return w_gd

## Algorithm 3: stochastic gradient descent with square loss
Similar to gradient descent, except we use the gradient at a single random training point every iteration.

In [125]:
def stoch_grad (X,Y,w , learning_rate, num_itt):
  m = len(X)
  for _ in range(num_itt):
    random_index=random.randint(0, m-1)
    xi = X[random_index]
    yi = Y[random_index]
    p=sigmoid(np.dot(xi,w))
    grad = xi * (p-yi)
    w_sgd -= learning_rate*grad
  return w_sgd



## Evaluation

Measure error $\|w - \hat{w}\|_2$ for each method at different sample size. For any
  fixed value of $m$, choose many different $w$'s and average the
  values $\|w -
  \hat{w}\|_2$ for Algorithms 1, 2 and 3.  Plot the results
  for for each algorithm as you make $m$ large (use $m=50, 100, 150, 200, 250$).
  Also record, for each algorithm, the time taken to run the overall experiment.

In [126]:
#create lists to store avg and execution times to plot
avg_d_lr = []
avg_d_gd = []
avg_d_sgd = []
extime_lr = []
extime_gd = []
extime_sgd = []




#Parameters
mval = [50,100,150,200,250]
numexp = 10

#Create lists to store distances and times of results of tests
for m in mval:
  dist_lr = []
  dist_gd = []
  dist_sgd = []
  time_lr = []
  time_gd = []
  time_sgd = []

  #Calculations
  for _ in range(numexp):
    wstar=wstar_set(m)
    X=X_set(m)
    Y=Y_set(X, wstar)   #ran out of time to debug this line


    # Logistic Regression calc
    lr_start_time = time.time()
    w_lr = logistic_reg (X,Y)
    time_lr = time.time()-lr_start_time
    dist_lr = np.linalg.norm(wstar-w_lr)


    # Gradient Decent
    gd_start_time = time.time()
    w_gd = grad_desc (X,Y,np.zeros(10), learning_rate = 0.1, num_itt = 1000)
    time_gd = time.time()-gd_start_time
    dist_gd = np.linalg.norm(wstar-w_gd)


    # Stochastic Gradient Decent
    sgd_start_time = time.time()
    w_sgd = stoch_grad (X,Y,np.zeros(10) , learning_rate = 0.1, num_itt = 1000)
    time_sgd = time.time()-sgd_start_time
    dist_sgd = np.linalg.norm(wstar-w_gd)


    #append lists
    dist_lr.append(dist_lr)
    dist_gd.append(dist_gd)
    dist_sgd.append(dist_sgd)
    time_lr.append(time_lr)
    time_gd.append(time_gd)
    time_sgd.append(time_sgd)

  #Calculate avg distances and exe times
  avg_d_lr.append(np.mean(dist_lr))
  avg_d_gp.append(np.mean(dist_gd))
  avg_d_sgd.append(np.mean(dist_sgd))
  extime_lr.append(np.mean(time_lr))
  extime_gd.append(np.mean(time_gd))
  extime_sgd.append(np.mean(time_sgd))


# Print and analyze results
for i, m in enumerate(mval):
    print(f"Results for m = {m}:")
    print(f"Average Distance (Logistic Regression): {avg_d_lr[i]:.4f}")
    print(f"Average Distance (Gradient Descent): {avg_d_gd[i]:.4f}")
    print(f"Average Distance (Stochastic Gradient Descent): {avg_d_sgd[i]:.4f}")
    print(f"Average Execution Time (Logistic Regression): {extime_lr[i]:.4f} seconds")
    print(f"Average Execution Time (Gradient Descent): {extime_gd[i]:.4f} seconds")
    print(f"Average Execution Time (Stochastic Gradient Descent): {extime_sgd[i]:.4f} seconds")
    print("-" * 50)



# Plot the average distances
plt.figure(figsize=(10, 6))
plt.plot(mval, avg_d_lr, label='Logistic Regression', marker='o')
plt.plot(mval, avg_d_gd, label='Gradient Descent', marker='s')
plt.plot(mval, avg_d_sgd, label='Stochastic Gradient Descent', marker='^')

plt.xlabel('Training Set Size (m)')
plt.ylabel('Average Euclidean Distance')
plt.title('Comparison of Algorithms: Average Euclidean Distance vs. Training Set Size')
plt.legend()
plt.grid(True)
plt.show()





ValueError: shapes (10,) and (50,10) not aligned: 10 (dim 0) != 50 (dim 0)

# Problem 2

In [None]:
from sklearn import datasets

In [None]:
cancer = datasets.load_breast_cancer()

For each depth in $1, \dots, 5$, instantiate an AdaBoost classifier with the base learner set to be a decision tree of that depth (set `n_estimators=10` and `learning_rate=1`), and then record the 10-fold cross-validated error on the entire breast cancer data set. Plot the resulting curve of accuracy against base classifier depth. Use $101$ as your random state for both the base learner as well as the AdaBoost classifier every time.

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score

x = cancer.data
y = cancer.target

depths = [1,2,3,4,5]
errors = []

for depth in depths:
    clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=depth, random_state=101),
                             n_estimators=10, learning_rate=1, random_state=101)
    scores = cross_val_score(clf, cancer.data, cancer.target, cv=10)
    errors.append(1 - np.mean(scores))

plt.plot(depths, errors, marker='o')
plt.xlabel('Base Classifier Depth')
plt.ylabel('Accuracy')
plt.title ('Accuracy vs. Base Classifier Depth for ADABoost')
plt.show()