In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
SEED = 3
no_of_samples = 100

In [None]:
np.random.seed(seed=SEED)

$Y = 3.5*X + random\_noise$

### Generate data

In [None]:
def generate_data(num_samples: int):
    X = np.array(range(num_samples))
    random_noise = np.random.uniform(-10, 20, size=no_of_samples)
    Y = 3.5 * X + random_noise
    return X, Y

In [None]:
X, Y = generate_data(num_samples=no_of_samples)
data = np.vstack([X, Y]).T

In [None]:
df = pd.DataFrame(data, columns=['X', 'Y'])
X = df[['X']].values
Y = df[['Y']].values
df.head(10)

### Scatter Plot

In [None]:
plt.title('Collected Data')
plt.xlabel('X')
plt.ylabel('Y')
plt.xticks(ticks=list(range(0, no_of_samples+1, 5)))
plt.scatter(x=df['X'], y=df['Y'])
plt.show()

### Split Data

Split data into
- Train data
- Validation data
- Test data

In [None]:
train_size = 0.6
validataion_size = 0.2
test_size = 0.2

In [None]:
indices = list(range(no_of_samples))
np.random.shuffle(indices)
X = X[indices]
Y = Y[indices]

In [None]:
# split indices
train_end = int(train_size * no_of_samples)
val_end = int((train_size + validataion_size) * no_of_samples)

In [None]:
train_X = X[:train_end]
val_X = X[train_end:val_end]
test_X = X[val_end:]

train_Y = X[:train_end]
val_Y = X[train_end:val_end]
test_Y = X[val_end:]

print("Shape of ")
print(f'X train: {train_X.shape}, Y train: {train_Y.shape}')
print(f'X val: {val_X.shape}, Y val: {val_Y.shape}')
print(f'X test: {test_X.shape}, Y test: {test_Y.shape}')

### Standardize Data
### $z = \frac{x_i - \mu}{\sigma}$

In [None]:
def stardize_data(data, mean, std_deviation):
    return (data - mean)/std_deviation

In [None]:
mean_X = np.mean(train_X)
std_X = np.std(train_X)
mean_Y = np.mean(train_Y)
std_Y = np.std(train_Y)

In [None]:
train_X = stardize_data(train_X, mean=mean_X, std_deviation=std_X)
val_X = stardize_data(val_X, mean=mean_X, std_deviation=std_X)
test_X = stardize_data(test_X, mean=mean_X, std_deviation=std_X)

train_Y = stardize_data(train_Y, mean=mean_Y, std_deviation=std_Y)
val_Y = stardize_data(val_Y, mean=mean_Y, std_deviation=std_Y)
test_Y = stardize_data(test_Y, mean=mean_Y, std_deviation=std_Y)

In [None]:
print (f"mean: {np.mean(test_X, axis=0)[0]:.1f}, std: {np.std(test_X, axis=0)[0]:.1f}")
print (f"mean: {np.mean(test_Y, axis=0)[0]:.1f}, std: {np.std(test_Y, axis=0)[0]:.1f}")

In [None]:
input_dim = train_X.shape[1]
output_dim = train_Y.shape[1]

In [None]:
W = 0.01 * np.random.randn(input_dim, output_dim)
b = np.zeros((1, 1))

print (f"W: {W.shape}")
print (f"b: {b.shape}")

### Model

In [None]:
y_pred = np.dot(train_X, W) + b

### MSE
$J(\theta) = MSE = \frac{1}{m}\sum_{i=1}^{m} ({h_{\theta}(x)^i - y^i})^2$

In [None]:
m = len(train_Y)
loss = 1/m * np.sum((train_Y - y_pred)**2)
print(f"Loss: {loss}")

### Gradients

In [None]:
m = len(train_Y)
dw = -(2/m) * np.sum((train_Y - y_pred) * train_X)
db = -(2/m) * np.sum(train_Y - y_pred)

In [None]:
learning_rate = 1e-1
W += -learning_rate * dw
b += -learning_rate * db

In [None]:
NUM_EPOCHS = 100
W = 0.01 * np.random.randn(input_dim, output_dim)
b = np.zeros((1, ))
m = len(train_Y)
# Training loop
for epoch_num in range(NUM_EPOCHS):

    # Forward pass [NX1] · [1X1] = [NX1]
    y_pred = np.dot(train_X, W) + b

    # Loss
    loss = (1/len(train_Y)) * np.sum((train_Y - y_pred)**2)

    # Show progress
    if epoch_num%10 == 0:
        print (f"Epoch: {epoch_num}, loss: {loss:.3f}")

    # Backpropagation
    dW = -(2/m) * np.sum((train_Y - y_pred) * train_X)
    db = -(2/m) * np.sum((train_Y - y_pred) * 1)

    # Update weights
    W += -learning_rate * dW
    b += -learning_rate * db

In [None]:
# Predictions
pred_train = W*train_X + b
pred_test = W*test_X + b

In [None]:
# Train and test MSE
train_mse = np.mean((train_Y - pred_train) ** 2)
test_mse = np.mean((test_Y - pred_test) ** 2)
print (f"train_MSE: {train_mse:.2f}, test_MSE: {test_mse:.2f}")

In [None]:
# Figure size
plt.figure(figsize=(15,5))

# Plot train data
plt.subplot(1, 2, 1)
plt.title("Train")
plt.scatter(train_X, train_Y, label="y_train")
plt.plot(train_X, pred_train, color="red", linewidth=1, linestyle="-", label="model")
plt.legend(loc="lower right")

# Plot test data
plt.subplot(1, 2, 2)
plt.title("Test")
plt.scatter(test_X, test_Y, label='y_test')
plt.plot(test_X, pred_test, color="red", linewidth=1, linestyle="-", label="model")
plt.legend(loc="lower right")

# Show plots
plt.show()