## Linear Regression

In [29]:
import random
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [30]:
# generating the dataset
label = []
ftr =  [[],[],[]]

In [31]:
# Generate 1000 liner samples
for i in range(1000):
    X1 = random.random()
    ftr[0].append(X1)
    X2 = random.random()
    ftr[1].append(X2)
    X3 = random.random()
    ftr[2].append(X3)
    Y = 5 * X1 + 3 * X2 + 1.5 * X3 + 6
    label.append(Y) 

In [32]:
# Creating DataFram to hold the data
allData = {'feature1': ftr[0], 'feature2': ftr[1], 'feature3': ftr[2], 'labels': label}
dataFram = pd.DataFrame(allData)
dataFram

Unnamed: 0,feature1,feature2,feature3,labels
0,0.198631,0.038648,0.090877,7.245417
1,0.955702,0.003128,0.782456,11.961578
2,0.715478,0.324892,0.027087,10.592695
3,0.435368,0.983851,0.991842,12.616155
4,0.380890,0.866892,0.408399,11.117724
...,...,...,...,...
995,0.346809,0.440573,0.947150,10.476487
996,0.126149,0.294068,0.000672,7.513957
997,0.396123,0.619179,0.473675,10.548662
998,0.814915,0.778199,0.190843,12.695438


In [33]:
# Separating features and labels
Xs = dataFram[['feature1', 'feature2', 'feature3']]
y = dataFram['labels']

In [34]:
# spliting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(Xs, y, test_size=0.2, random_state=42)

In [35]:
# feature scaling using standerScaler
# scaling the features to prevent overflow issues in computations
# this part avoid feautres bias range of feature (0->1)
sclr = StandardScaler()
X_train_scled = sclr.fit_transform(X_train)
X_test_scled = sclr.transform(X_test)

In [36]:
# Adding bias term to X_train for intercept term in linear regression
X_bias_train = np.column_stack((np.ones(len(X_train_scled)), X_train))

In [37]:
weights = np.array([5, 3, 1.5,6])

In [38]:
# Cost function
def costFn(weights,x,y):
    m = len(y)
    predict = x.dot(weights)
    sub = np.subtract(predict,y)
    squaredErrors = np.square(sub)
    cost = 1 / (2 * m) * np.sum(squaredErrors)
    return cost

In [39]:
# Gradient descent function
def gradientDescent(X, Y, weights, LR, iterations):
    m = len(Y)
    cost_history = []
    theta_history = [weights]

    for _ in range(iterations):
        pred = np.dot(X, weights)
        error = pred - Y
        cost = costFn(weights,X,Y)
        cost_history.append(cost)
        # Update weights
        weights = weights - (LR * (1 / m) * np.dot(X.T, error))
        theta_history.append(weights)

    return weights, cost_history, theta_history

In [40]:
# Running gradient descent
final_weights, cost_history, theta_history = gradientDescent(X_bias_train, y_train, weights, LR=0.01, iterations=1000)

In [41]:
print("Updated weights:", final_weights)
# calculating loss using the final weights and training data
loss = costFn(final_weights, X_bias_train, y_train)
print("Loss:", loss)

Updated weights: [5.58461032 4.15087742 2.54514193 3.68939931]
Loss: 0.22392165752454404
