# Stochastic Gradient Descent From Scratch

## Importing Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing
from sklearn.metrics import mean_squared_error

## California Housing Dataset

In [None]:
housing_data = fetch_california_housing()

In [None]:
housing_data

In [None]:
housing_data.data

In [None]:
_dict = {"name": "Ivan"}

In [None]:
_dict["name"]

In [None]:
housing_data.data

In [None]:
housing_data.feature_names

In [None]:
features_df = pd.DataFrame(housing_data.data, columns=housing_data.feature_names)
target_df = pd.DataFrame(housing_data.target, columns=['Target'])

In [None]:
features_df

In [None]:
target_df

In [None]:
df = features_df.join(target_df)

In [None]:
df.head()

In [None]:
df.corr()

## Preprocessing: Removing Outliers and Scaling

In [None]:
df[['MedInc', 'Target']].describe()[1:] #.style.highlight_max(axis=0)

In [None]:
df = df[df.Target < 3.5]
df = df[df.MedInc < 8]

### Removed Outliers

In [None]:
df[['MedInc', 'Target']].describe()[1:]

In [None]:
def scale(x):
    min = x.min()
    max = x.max()
    return pd.Series([(i - min)/(max - min) for i in x])

X = scale(df.MedInc)
y = scale(df.Target)

In [None]:
X

In [None]:
X.max(), y.max() # features are scaled now

In [None]:
X.min(), y.min()

## Correlation Between Price and Income

In [None]:
plt.figure(figsize=(16,6))
plt.rcParams['figure.dpi'] = 227
plt.style.use('seaborn-whitegrid')
plt.scatter(X, y, label='Data', c='#388fd8', s=6)
plt.title('Positive Correlation Between Income and House Price', fontsize=15)
plt.xlabel('Income', fontsize=12)
plt.ylabel('House Price', fontsize=12)
plt.legend(frameon=True, loc=1, fontsize=10, borderpad=.6)
plt.tick_params(direction='out', length=6, color='#a0a0a0', width=1, grid_alpha=.6)
plt.show()

## Multiple Linear Regression with Least Squares

## $$ m =(A^TA)^{-1} A^Ty $$

### $$m - parameters, \: A - data, \: y - target$$

In [None]:
X = df.drop('Target', axis=1) # matrix A, or all the features
y = df.Target

In [None]:
class MultipleLinearRegression:
    '''
    Multiple Linear Regression with Least Squares    
    '''    
    def fit(self, X, y):
        X = np.array(X)
        y = np.array(y)
        self.coeffs = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)
        
    def predict(self, X):
        X = np.array(X)
        result = np.zeros(len(X))
        for i in range(X.shape[1]):
            result += X[:, i] * self.coeffs[i]
        return result
    
    def coeffs(self):
        return self.coeffs

In [None]:
mlp = MultipleLinearRegression()

In [None]:
mlp.fit(X, y)

In [None]:
mlp.coeffs

In [None]:
y_pred = mlp.predict(X)

In [None]:
y_pred

In [None]:
y

In [None]:
mean_squared_error(y, y_pred)

# Gradient Descent

### $$ MSE = \frac{1}{n}\sum_{i=1}^{n} (y_i - \hat{y_i})^2 \quad \textrm{where} \quad \hat{y_i} = mx_i + b $$

### Partial Derivatives

### $$𝑓(𝑚,𝑏)= \frac{1}{n}\sum_{i=1}^{n}(y_i - (mx_i+b))^2$$

### Partical Derivative With Respect to `m`

# $$ [f(g(x))]' = f'(g(x)) * g(x)' \: - \textrm{chain rule}$$

# $$ (y - (mx + b))^2 $$

### $$\frac{\partial f}{\partial m} = \frac{1}{n}\sum_{i=1}^{n}-2x_i(y_i - (mx_i+b))$$

### Partical Derivative With Respect to `b`

### $$\frac{\partial f}{\partial b} = \frac{1}{n}\sum_{i=1}^{n}-2(y_i - (mx_i+b))$$

### Final Function

In [None]:
def gradient_descent(X, y, lr=0.05, epoch=10):
    
    '''
    Gradient Descent for a single feature
    '''
    
    m, b = 0.2, 0.2 # parameters
    log, mse = [], [] # lists to store learning process
    N = len(X) # number of samples
    
    for _ in range(epoch):
                
        f = y - (m*X + b)
    
        # Updating m and b
        m = m - lr * (-2 * X.dot(f).sum() / N)
        b -= lr * (-2 * f.sum() / N)
        
        log.append((m, b))
        mse.append(mean_squared_error(y, (m*X + b)))        
    
    return m, b, log, mse

### Predicting House Price With Gradient Descent

In [None]:
X = df.MedInc
y = df.Target

m, b, log, mse = gradient_descent(X, y, lr=0.01, epoch=100)

y_pred = m*X + b

In [None]:
y_pred

In [None]:
mean_squared_error(y, y_pred)

## Stochastic Gradient Descent

In [None]:
def SGD(X, y, lr=0.05, epoch=10, batch_size=1):        
    pass
    return m, b, log, mse