## Imports

In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from implementations import *
%load_ext autoreload
%autoreload 2

## Checking the first sample

In [None]:
# Long running
y, x = load_data(train=True)
print(f"First sample: {x[0,:]}")
print(f"First sample label: {y[0]}")

## Tests

In [None]:
# Define some test data
testing_y = np.array([1,1,2,2,4])
testing_x = np.array([[1],[2],[3],[4],[5]])
testing_w = np.array([-0.1, 0.7])

print(f"X:\n {testing_x}\n")
testing_sx, testing_mean_x, testing_std_x = standardize(testing_x) # Standardization
print(f"X normalized:\n {testing_sx}\n") 
testing_tx = add_x_bias(testing_sx) # Adding bias column to X
print(f"X with bias:\n {testing_tx}")


### MSE

In [None]:
print("MSE TEST")
print(f"Got:{compute_mse(testing_y, testing_tx, testing_w)}")
print("Expected:" + str(4.71))

### MAE

In [None]:
print("MAE TEST")
print(f"Got:{compute_mae(testing_y, testing_tx, testing_w)}")
print("Expected:" + str(2.1))

### MSE Gradient

In [None]:
print("MSE Gradient TEST")
print(f"Got:{compute_mse_gradient(testing_y, testing_tx, testing_w)}")
print("Expected:" + str([-2.1, -0.29]))

### MAE Gradient

In [None]:
print("MAE Gradient TEST")
print(f"Got:{compute_mae_gradient(testing_y, testing_tx, testing_w)}")
print("Expected:" + str([-1, -0]))

## Assigment

### Replace -999 by col_mean trial and error

In [None]:
# OLD SOLUTION - DOES IT WORK ?
for i in range(np.shape(x)[1] + 1): # Iterate through x to replace -999 by the mean of its column (which is more natural)
    col_mean = np.nanmean(np.ma.MaskedArray(x[i, :], mask=(np.array(x[i, :]) == -999)))
    x[i] = [xi if xi != -999 else col_mean for xi in x[i, :]]

In [None]:
# NEW SOLUTION - IN replace_min_999_by_col_mean() IN implementations-py
test_stuff = np.array([[1.,2.,-999.], [4.,-999.,9.], [1.,-999.,0.]])
print(test_stuff)

# print(replace_min_999_by_col_mean(test_stuff)) IMPLEMENTATION BELOW

mask_999 = np.where(test_stuff == -999, 1, 0) # 1 where -999 are, 0 otherwise
for i in range(test_stuff.shape[1]):
    col = test_stuff[:, i] # Get column
    mask_col = mask_999[:, i] # Get corresponding mask column
    col_mean = np.ma.masked_array(col, mask_col).mean(axis=0) # Compute mean without the -999
    test_stuff[:, i] = np.where(col == -999, col_mean, col) # Replace -999 by mean or keep column
    
print(test_stuff)

### Pre-processing

In [None]:
y, x = load_data(train=True) # Load train data

x = replace_min_999_by_col_mean(x) # Handle invalid values
x, mean_x, std_x = standardize(x) # Standardize x
tx = add_x_bias(x) # Add the bias term in x

### Linear regression using gradient descent

In [None]:
initial_w, max_iters, gamma = np.zeros(31), 100, 1e-10
w, loss = least_squares_GD(y, tx, initial_w, max_iters, gamma)
print(f"Loss: {loss}\n w: {w}")

### Linear regression using stochastic gradient descent

In [None]:
initial_w, max_iters, gamma = np.zeros(31), 100, 1e-10
w, loss = least_squares_SGD(y, tx, initial_w, max_iters, gamma)
print(f"Loss: {loss}\n w: {w}")

### Least squares

In [None]:
w, loss = least_squares(y, tx)
print(f"Loss: {loss}\n w: {w}")

### Ridge regression

In [None]:
lambdas = 1
w, mse = ridge_regression(y, tx, lambda_)
print(f"Loss: {loss}\n")

### Logistic regression using gradient descent or SGD (y ∈ {0, 1})

### Regularized logistic regression using gradient descent or SGD (y ∈ {0, 1}, with regularization term λ∥w∥**2)