<a href="https://colab.research.google.com/github/SusheelThapa/ML-From-Scratch/blob/linearRegression/linear_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Linear Regression

## Algorithm for Linear Regression

### Training
- Initialize *weight* as zero
- Initialzie *bias* as zero
<br/>

**Given a data point**
- Predict result by using ***`y_hat = wx+b`***
- Calculate ***mean_squared_error***
- Use ***gradient descent*** to figure out new weight and bias values
- Repeat n times

### Updating the parameters

***`w = w- learning_rate * dj_dw`***

***`b = b- learning_rate * dj_db`***

### Testing

Given a data point
- Put in the values from the data points into the equation ***`y_hat = wx+b`***



## Coding the above algorithms

### Importing necessary modules

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

### Writing the fit function

In [None]:
def fit(X,y,learning_rate,number_of_iterations):
    # Extracting the number of samples and features
    n_samples,n_features = X.shape

    # Initialize weights and bias with zeros
    weights = np.zeros(n_features)
    bias = 0

    #Repeating for n times
    for i in range(number_of_iterations):
        # w1*x1 + w2*x2 +...wn*xn
        y_pred = np.dot(X,weights) + bias

        # Calculating partial derivative of cost function wrt to w and b
        dj_dw = (1/n_samples)*np.dot(X.T,(y_pred -y))
        dj_db = (1/n_samples)* np.sum(y_pred- y)

        # Updating the value of w and b
        weights = weights - learning_rate * dj_dw
        bias = bias - learning_rate *dj_db
    
    return weights, bias


### Writing the predict function

In [None]:
def predict(X,W,b):
    y_pred = np.dot(X,W) + b
    return y_pred

### Feature Scaling of data 

In [None]:
def scaleData(X):
    n_samples, n_features = X.shape

    for i in range(n_features):
        max_x= X[:,i].max()
        X[:,i] = np.divide(X[:,i],max_x)
    return X

### Calculating mean sqaure error

In [None]:
def mse(y_pred, y):
    return np.mean((y - y_pred)**2)

## Testing the model with one feature dataset

### Loading the datasets

In [None]:
from sklearn.model_selection import train_test_split
from sklearn import datasets

X,y = datasets.make_regression(n_samples=100,n_features=1,noise=20,random_state=4)
X_train, X_test,y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=1234)

### Ploting the dataset without feature scaling

In [None]:
plt.scatter(X[:,0],y,color='r',marker='X',s=3)
plt.show()

### Scaling the data

In [None]:
X_train = scaleData(X_train)
X_test = scaleData(X_test)

### Plotting the data after feature scaling

In [None]:
plt.scatter(X_train[:,0],y_train,color='r',marker='X',s=3)
plt.show()

### Training the model

In [None]:
W,b = fit(X_train,y_train,0.1,10000)

### Testing the model

In [None]:
y_pred = predict(X_test,W,b)

### Ploting the line

In [None]:
plt.plot(X_test,y_pred,linewidth=3,color='black',label='Prediction')
plt.scatter(X_train,y_train,color='r',marker='X',s=10,label='Training Data')
plt.scatter(X_test,y_test,color='b',marker='o',s=10 , label="Testing Data")
plt.legend()
plt.show()

### Calculating **mean squared error**

In [None]:
mean_squared_error = mse(y_pred,y_test)
print(mean_squared_error)

## Testing the model with more than one feature dataset

### Loading the datasets


In [None]:
data = pd.read_excel('house_price.xlsx')

# Seperating the X and y from the dataset
X = data.loc[:,[
    'X1 transaction date',
    'X2 house age',
    'X3 distance to the nearest MRT station',
    'X4 number of convenience stores',
    'X5 latitude',
    'X6 longitude']]

y = data.loc[:,'Y house price of unit area']

# Converting the data into numpy array
X = X.to_numpy()
y = y.to_numpy()

# Creating testing and training data
X_train = X[:350]
X_test = X[351:]
y_train = y[:350]
y_test = y[351:]

### Scaling the data

In [None]:
X_train = scaleData(X_train)
X_test = scaleData(X_test)

### Ploting the dataset with individual features

In [None]:
n_samples, n_features = X_train.shape

customize = {
        "title" : ['Transaction Date',
         'House age',
         'Distance to the nearest MRT',
         'Number of convenience store',
         'Latitude',
         'Longitude'],
        "color":['red',"blue",'orange','lightgreen',"coral","brown"],
                }


for i in range(n_features):
    plt.scatter(X_train[:,i],y_train,color=customize["color"][i],marker='o',s=20)
    plt.title(customize["title"][i])
    plt.show()


### Training the model

In [None]:
W,b = fit(X_train,y_train,0.01,10000)

### Predicting the price of house

In [None]:
y_pred = predict(X_test,W,b)

### Calculating mean square error

In [None]:
mean_squared_error = mse(y_pred, y_test)
print(mean_squared_error)