In [2]:
import numpy as np
import pandas as pd
import random
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn

In [3]:
def normalize(data):
        if(data.all() == 0):
              return data
        min = np.min(data)
        max = np.max(data)
        range = max-min
        if(range == 0):
            return np.zeros_like(data)
        data_norm = (data - min)/range
        return data_norm

In [4]:
data = pd.read_csv('https://github.com/ybifoundation/Dataset/raw/main/Salary%20Data.csv')

In [5]:
train, test = train_test_split(data, train_size=0.75, random_state=10)

In [6]:
x_train_raw = np.array(train['Experience Years'])
y_train_raw = np.array(train['Salary'])
x_test_raw = np.array(test['Experience Years'])
y_test_raw = np.array(test['Salary'])

x_train_raw = np.reshape(x_train_raw, (x_train_raw.shape[0], 1))
y_train_raw = np.reshape(y_train_raw, (y_train_raw.shape[0], 1))
x_test_raw = np.reshape(x_test_raw, (x_test_raw.shape[0], 1))
y_test_raw = np.reshape(y_test_raw, (y_test_raw.shape[0], 1))

In [7]:
x_train = normalize(x_train_raw)
y_train = normalize(y_train_raw)
x_test = normalize(x_test_raw)
y_test = normalize(y_test_raw)

In [8]:
def initialize_parameters():
    w = np.random.rand(1,1) * 0.1
    b = np.zeros((1,1))
    parameters = {'w':w, 'b':b}
    return parameters

In [76]:
def forward_propagation(x, parameters):
    w, b = parameters['w'], parameters['b']
    yhat = np.matmul(x, w) + b
    yhat = np.reshape(yhat, (yhat.shape[0], 1))
    return yhat

In [77]:
def calculate_cost(yhat, y):
    m = y.shape[0]
    error = yhat - y
    squared_error = error ** 2
    cost = np.sum(squared_error, 0)/m
    intermediate_values = {'error':error}
    return cost, intermediate_values

In [200]:
def back_propagation(x, intermediate_values):
    m = x.shape[0]
    error = intermediate_values['error']
    dw = x * (2 * error) * (1/m)
    dw = np.sum(dw, axis=0, keepdims=True)
    db = (2 * error) * (1/m)
    db = np.sum(db, axis=0, keepdims=True)
    grads = {'dw':dw, 'db':db}
    return grads

In [201]:
def update_parameters(parameters, grads, learning_rate=0.1):
    w, b = parameters['w'], parameters['b']
    dw, db = grads['dw'], grads['db']
    w = w - learning_rate * dw
    b = b - learning_rate * db
    parameters = {'w':w, 'b':b}
    return parameters

In [202]:
def save_parameters(parameters):
    for p in parameters:
        np.save(p, parameters[p])

In [222]:
def load_parameters():
    parameters = initialize_parameters()
    for p in parameters:
        try:
            parameters[p] = np.load(str(p + '.npy'))
        except FileNotFoundError:
            print('no saved parameters found, loading default')
    return parameters

In [251]:
def train_model(epochs=10, learning_rate=0.1):
    parameters = load_parameters()
    for epoch in range(epochs):
        yhat = forward_propagation(x_train, parameters)
        cost, intermediate_values = calculate_cost(yhat, y_train)
        print(f'epoch: {epoch}    cost: {cost}')
        grads = back_propagation(x_train, intermediate_values)
        parameters = update_parameters(parameters, grads, learning_rate)
    save_parameters(parameters)
    print(f'parameters saved: {parameters}    final cost: {cost}')

In [224]:
def predict(x):
    parameters = load_parameters()
    yhat = forward_propagation(x, parameters)
    print(f'prediction: {yhat}')

In [258]:
train_model(epochs=1000, learning_rate=0.1)

epoch: 0    cost: [0.00434362]
epoch: 1    cost: [0.00434362]
epoch: 2    cost: [0.00434362]
epoch: 3    cost: [0.00434362]
epoch: 4    cost: [0.00434362]
epoch: 5    cost: [0.00434362]
epoch: 6    cost: [0.00434362]
epoch: 7    cost: [0.00434362]
epoch: 8    cost: [0.00434362]
epoch: 9    cost: [0.00434362]
epoch: 10    cost: [0.00434362]
epoch: 11    cost: [0.00434362]
epoch: 12    cost: [0.00434362]
epoch: 13    cost: [0.00434362]
epoch: 14    cost: [0.00434362]
epoch: 15    cost: [0.00434362]
epoch: 16    cost: [0.00434362]
epoch: 17    cost: [0.00434362]
epoch: 18    cost: [0.00434362]
epoch: 19    cost: [0.00434362]
epoch: 20    cost: [0.00434362]
epoch: 21    cost: [0.00434362]
epoch: 22    cost: [0.00434362]
epoch: 23    cost: [0.00434362]
epoch: 24    cost: [0.00434362]
epoch: 25    cost: [0.00434362]
epoch: 26    cost: [0.00434362]
epoch: 27    cost: [0.00434362]
epoch: 28    cost: [0.00434362]
epoch: 29    cost: [0.00434362]
epoch: 30    cost: [0.00434362]
epoch: 31    cost:

In [259]:
x = np.array([[10]])
predict(x)

prediction: [[10.71734316]]
