In [None]:
import scipy.io
import numpy as np
import random
import matplotlib.pyplot as plt
import matplotlib as mpl

from visualize import visualize
from show_quantization import show_quantization

# Linear Regression (Continued)

In [None]:
df = scipy.io.loadmat(f"data/diabetes.mat")
X = df["X"]
n_samples, n_feats = X.shape
t = df["t"]
t_names = ["age", "sex", "bmi", "blood_pressure", "serum_1", 
           "serum_2", "serum_3", "serum_4", "serum_5", "serum_6"]

In [None]:
corr_vec = np.corrcoef(X,t, rowvar=False)[-1, :n_feats]
most_corr = np.argsort(np.abs(corr_vec))

In [None]:
from sklearn.base import BaseEstimator

class MyLinearRegressor(BaseEstimator):
    def __init__(self, add_bias=True):
        super().__init__()
        self.add_bias = add_bias
        
    def fit(self, X, y):
        if self.add_bias:
            X = np.concatenate((X, np.ones((X.shape[0], 1))), axis=-1)
        if len(y.shape) < 2:
            y = np.expand_dims(y, axis=-1)
        ###################
        # INSERT CODE HERE
        ###################
        return self
    
    def predict(self, X):
        pass
    
    def fit_predict(self, X, y):
        return self.fit(X, y).predict(X)
    
    def score(self, X, y_true):
        return 0

In [None]:
# 1.1.1  Bivariate Linear Regression

# Compute the Root Mean Square Error
def compute_rmse(predict, target):
    ###################
    # INSERT CODE HERE
    ###################
    pass


###################
# INSERT CODE HERE: 
# Create one 1-var linear regressor and one 2-vars linear regressor;
# Predict target values according to both models
###################

print(compute_rmse(predict, t))
print(compute_rmse(predict2, t))


In [None]:
# 1.1.2  Multivariate Linear Regression
###################
# INSERT CODE HERE
###################


In [None]:
# 1.1.3  Train-test Generalization
X_copy = X.copy()
t_copy = t.copy()

def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

X_perm, t_perm = unison_shuffled_copies(X_copy, t_copy)
half_index = X_perm.shape[0]//2

X1 = X_perm[:half_index, :]
X2 = X_perm[half_index:, :]

t1 = t_perm[:half_index, :]
t2 = t_perm[half_index:, :]

linreg = MyLinearRegressor()
i = 3
best_indices = most_corr[-i:]

xx1 = X1[:, best_indices]
xx2 = X2[:, best_indices]

linreg.fit(xx1, t1)

plt.figure(figsize=(10, 5), dpi=80)


plt.subplot(1,2,1)
predict_X1 = linreg.predict(xx1)
plt.scatter(predict_X1, t1)
plt.title(f'(X1, t1) \n {i} best corrcoeff -> RMSE: {compute_rmse(predict_X1, t1)}')


plt.subplot(1,2,2)
predict_X2 = linreg.predict(xx2)
plt.scatter(predict_X2, t2)
plt.title(f'(X2, t2) \n {i} best corrcoeff -> RMSE: {compute_rmse(predict_X2, t2)}')




In [None]:
# 1.1.4 Stochastic Gradient Descent

class MySGDLinearRegressor(MyLinearRegressor):
    def __init__(self, n_epochs=100, lr=1e-3, lr_annealing=0.99, **kwargs):
        super().__init__(**kwargs)
        self.n_epochs = n_epochs
        self.lr = lr
        self.lr_annealing = lr_annealing
        
    def fit(self, X, y):
        assert X.shape[-1] == 2, "Only 2 features are supported for now"
        if len(y.shape) < 2:
            y = np.expand_dims(y, axis=-1)
        
        w0 = 1
        w1 = 100
        b  = -10
        lr = self.lr
        ## Compute coeffs
        for epoch in range(self.n_epochs):
            for x, ytrue in zip(X, y):
                # Compute Gradient
                ###################
                # INSERT CODE HERE
                ###################

                # Update weights
                ###################
                # INSERT CODE HERE
                ###################

                # Update lr 
                ###################
                # INSERT CODE HERE
                ###################


        self.coeffs = np.array([w0, w1])
        self.bias = b#[0]
        return self
best_indices = most_corr[-2:]
xx = X[:, best_indices]

linreg = MyLinearRegressor()
sgdlinreg = MySGDLinearRegressor(n_epochs=100, lr=1e-3, lr_annealing=0.9)

real_score = linreg.fit(xx, t).score(xx, t)
sgd_score = sgdlinreg.fit(xx, t).score(xx, t)

print(f"Pseudo-inverse method: {real_score:5.2f} (coeffs: {linreg.coeffs[0]}, bias={linreg.bias})")
print(f"SGD method:            {sgd_score:5.2f} (coeffs: {sgdlinreg.coeffs}, bias={sgdlinreg.bias})")

# Radial Basis Function Network


In [None]:
data_file = "data/bell_shaped.mat"
# data_file = "data/wave_shaped.mat"

df = scipy.io.loadmat(data_file)
x_train = df['X_train']
x_test = df['X_test']
t_train = df['T_train']
t_test = df['T_test']

In [None]:
visualize(x_train,t_train)
visualize(x_test,t_test)

In [None]:
#put your code of competitive learning here
def comp_learning(X, Y, n_epochs=100, alpha=0.1, beta=0.99, min_epsilon=1e-3):
    ###################
    # INSERT CODE HERE
    ###################
    pass

# Centroid Initialization
def get_inits(X, Q, method = "sample"):
    inits = {}
    mini = np.min(X, axis=0)
    maxi = np.max(X, axis=0)
    
    n_feats = X.shape[-1]
    rand_array = [
    for i in range(n_feats):
        rand_array.append(
            np.random.uniform(mini[i],maxi[i],Q)
        )
    inits["random"] = np.array(rand_array).T
    inits["sample"] = np.array(random.sample(X.tolist(), Q))

    return inits[method]

In [None]:
class MyRBFN():
    def __init__(self, nb_centers, width_scaling):
        super().__init__()
        self.nb_centers = nb_centers
        self.width_scaling = width_scaling

        self.linear_model = MyLinearRegressor(add_bias = True)
        
    def fit_centers(self,X):
        centroid_inits = get_inits(X,self.nb_centers)
        # c is of shape (nb_centers,X.shape[1])
        self.c, self.i = comp_learning(X, centroid_inits, n_epochs=100, alpha=0.1, beta=0.99, min_epsilon=1e-3)
    
    def fit_widths(self,X):
        ###################
        # INSERT CODE HERE
        ###################
        pass
        
    def fit_weights(self,X,y):
        ###################
        # INSERT CODE HERE
        ###################
        pass
        
    def fit(self, X, y):
        self.fit_centers(X)
        self.fit_widths(X)
        self.fit_weights(X,y)
    
    def non_linear_transform(self,X):
        ###################
        # INSERT CODE HERE
        ###################
        pass
    
    def predict(self, X):
        ###################
        # INSERT CODE HERE
        ###################
        # SOLUTION
        pass
    
    def fit_predict(self, X, y):
        return self.fit(X, y).predict(X)
    
    def score(self, X, y_true):
        y = self.predict(X)
        return compute_rmse(y, y_true)

In [None]:
rbfn = MyRBFN(nb_centers =40, width_scaling = 4.)
rbfn.fit(x_train,t_train)

In [None]:
# Try this after you implemented fit_centers() 
show_quantization(x_train,rbfn.c)

In [None]:
visualize(x_test,rbfn.predict(x_test))
visualize(x_test,t_test)
rbfn.score(x_test,t_test)