## Challenge - Hardwork pays off

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Data preparation
- download
- load
- normalise
- visualise

In [None]:
# Load the csv
X = pd.read_csv("Training Data_Hard/Linear_X_Train.csv")
y = pd.read_csv("Training Data_Hard/Linear_Y_Train.csv")

In [None]:
X = X.values

In [None]:

y = y.values

In [None]:
#Normalise the data
X = X.reshape((-1,))
y = y.reshape((-1,))
mean = X.mean()
std = X.std()
print(mean,std)
# since std is nearly 1 that shows the data is normalised
X = (X-mean)/std

In [None]:
plt.style.use("seaborn")
plt.scatter(X,y,label="Train",color="orange")
plt.title(" Hardwork vs Performance")
plt.xlabel("hardwork")
plt.ylabel("Performance")
plt.legend()
plt.show()

## Gradient descent
--> with the help of gradient descent we find that value of theta that gives minimum error

In [None]:
def hypothesis(x,theta):
    # theta = [theta0, theta1]
    y_ = theta[0] + theta[1]*x
    return y_

def gradient(X,Y,theta):
    m=X.shape[0]
    grad = np.zeros((2,))
    for i in range(m):
        x = X[i]
        y_= hypothesis(x,theta)
        y = Y[i]
        
        grad[0] += (y_ - y)
        grad[1] += (y_ - y)*x
    return grad/m

def error(X,Y,theta):
    m = X.shape[0]
    total_error = 0.0
    for i in range(m):
        y_ = hypothesis(X[i],theta)
        total_error += (y_ - Y[i])**2

    return (total_error/m)

def gradient_descent(X,Y,max_steps=100,learning_rate = 0.1):
    theta =np.zeros((2,))
    theta_list = []
    error_list = []
    for i in range(max_steps):
        grad = gradient(X,Y,theta)
        e = error(X,Y,theta)
        theta[0] = theta[0] - learning_rate*grad[0]
        theta[1] = theta[1] - learning_rate*grad[1]

        theta_list.append((theta[0],theta[1]))
        error_list.append(e)

    return theta,error_list,theta_list

In [None]:
Ot, el, tl = gradient_descent(X,y)

In [None]:
print(Ot)
plt.plot(el)
plt.title('Error Function')
plt.show()

### Prediction and best line

In [None]:
y_ = hypothesis(X,Ot)

In [None]:
y_

In [None]:
plt.scatter(X,y,color="red",label="Actual")
plt.plot(X,y_,color="orange",label="Predicted")
plt.xlabel("Hardwork")
plt.ylabel("Performance")
plt.legend()
plt.show()

In [None]:
X_test = pd.read_csv("Test Cases/Linear_X_Test.csv").values
Y_test = hypothesis(X_test,Ot)
y_test = pd.DataFrame(data=Y_test,columns=["y"])

y_test.to_csv("Ypredtest1.csv",index=False)


# Compute score
score : R2(R-squared) or Coefficient of determination

In [None]:
def r2_score(y_, y):
    term2 = sum((y_ - y)**2)
    term3 = sum((y-y.mean())**2)
    score = (1 - term2/term3)*100
    return score

In [None]:
r2_score(y_,y)

## Visualising loss function, gradient descent and theta update

In [None]:
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import numpy as np


## 1. Loss function

In [None]:
theta = Ot
theta

In [None]:
np.random.seed(1)
T0 = np.arange(-40,40,1)
T1 = np.arange(40,120,1)
T0,T1 = np.meshgrid(T0,T1)
J = np.zeros(T0.shape)
for i in range(J.shape[0]):
    for j in range(J.shape[1]):
        y_ = T1[i,j]*X + T0[i,j]
        J[i,j] =np.sum((y-y_)**2)/y.shape[0]
# print(T0,T1)

In [None]:
# SURFACE PLOT
fig = plt.figure()
axes = fig.gca(projection = '3d')
axes.plot_surface(T0,T1,J,cmap="rainbow")
plt.show()

In [None]:
# CONTOUR PLOT
fig = plt.figure()
axes = fig.gca(projection = '3d')
axes.contour(T0,T1,J,cmap="rainbow")
plt.show()

## 2.Plot Gradient descent

## --> Plot Update theta

In [None]:
theta_list = np.array(tl)  # Stores how theta was changing every time
plt.plot(theta_list[:,0],label="Theta0")
plt.plot(theta_list[:,1],label="Theta1")
plt.legend()
plt.show()

In [None]:
#Trajectory taced by theta updates in the loss function
fig = plt.figure()
axes = fig.gca(projection = '3d')
axes.plot_surface(T0,T1,J,cmap="rainbow")
axes.scatter(theta_list[:,0],theta_list[:,1],el)
# axes.scatter(theta_list[:,1],label="Theta1")
# plt.legend()
plt.show()

In [None]:
fig = plt.figure()
axes = fig.gca(projection = '3d')
axes.contour(T0,T1,J,cmap="rainbow")
axes.scatter(theta_list[:,0],theta_list[:,1],el)
# axes.scatter(theta_list[:,1],label="Theta1")
# plt.legend()
plt.show()

In [None]:
# 2d contour plot -->

plt.contour(T0,T1,J,cmap="rainbow")
plt.scatter(theta_list[:,0],theta_list[:,1])
plt.show()

In [None]:
np.save("Thetalist.npy",theta_list)