# Linear Regression

## Seeing Linear Regression in action on a small dataset

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
data = np.loadtxt("lin_reg.txt", delimiter = ",")
data.shape

In [None]:
plt.plot(data[:, 0], data[:, 1], "bo")

In [None]:
X = data[:, 0]
Y = data[:, 1].reshape(data.shape[0], 1)
m = X.shape[0]

In [None]:
Y

In [None]:
X

In [None]:
# add a column of "ones" to X. This will allow us to search for the constant parameter theta_0

X = np.c_[np.ones(m), X]
X

In [None]:
# define some parameters for the regression model

alpha = 0.01 # learning rate
iterations = 1500 # can be changed to whatever you want
theta = np.zeros((2, 1)) # parameters to be calculated

In [None]:
theta

In [None]:
# defining the cost function

def cost(X, y, theta):
    J = 1/(2*m) * np.sum((np.dot(X,theta) - y)**2)
    
    return J

In [None]:
# testing the cost function to see if it works (use different values for theta and see it change)
cost(X, Y, [[-3], [8]])

In [None]:
# training the model!

history = []

for i in range(iterations):
    theta = theta - alpha * 1/m * np.dot(X.T, (np.dot(X, theta) - Y))
    history.append(cost(X, Y, theta))

In [None]:
# trained parameters!
theta

In [None]:
# seeing the results!

fig, ax = plt.subplots(figsize = (8,8))
ax.plot(X[:,1], Y, "ro")
ax.plot(X[:,1], np.dot(X, theta))

## Predicting Housing Prices in Mumbai (INR) based on their size in Square Feet
Data obtained from: https://www.kaggle.com/ruchi798/housing-prices-in-metropolitan-areas-of-india?select=Mumbai.csv

In [None]:
# import a dataset

dataset = pd.read_csv('Bangalore.csv')
dataset

In [None]:
X_data = dataset.iloc[:, 1].values.reshape(-1,1)
Y_data = dataset.iloc[:, 0].values

In [None]:
X_data.shape

In [None]:
plt.scatter(X_data[:,0], Y_data)

In [None]:
X_orig = X_data[X_data < 6000].reshape(-1,1)
Y_orig = Y_data[X_data[:,0] < 6000]

plt.scatter(X_orig[:,0], Y_orig)

In [None]:
# scaling the input and output arrays
from sklearn.preprocessing import StandardScaler

# scaling the inputs
xscaler = StandardScaler()
X = xscaler.fit_transform(X_orig)

# scaling the outputs
yscaler = StandardScaler()
Y = yscaler.fit_transform(Y_orig.reshape(-1,1))

In [None]:
# defining the scikit-learn model

from sklearn.model_selection import train_test_split
X_Train, X_Test, Y_Train, Y_Test = train_test_split(X, Y, test_size = 1/3, random_state = 0)

In [None]:
# training the scikit-learn model

from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_Train, Y_Train)

In [None]:
Y_Pred = regressor.predict(X_Test)

In [None]:
#visualizing training set results

plt.scatter(X_Train[:,0], Y_Train, color = 'red')
plt.plot(X_Train[:,0], regressor.predict(X_Train), color = 'blue')
plt.title('Price vs Sq. Feet Size  (Training Set)')
plt.xlabel('Size (sq.feet)')
plt.ylabel('Price')
plt.show()

In [None]:
#visualizing test set results

plt.scatter(X_Test[:,0], Y_Test, color = 'red')
plt.plot(X_Train[:,0], regressor.predict(X_Train), color = 'blue')
plt.title('Price vs Sq. Feet Size  (Test Set)')
plt.xlabel('Size (sq.feet)')
plt.ylabel('Price')
plt.show()

In [None]:
#predict function to return price of house given it's size in sq. feet

def predict_price(size):
    arr = np.array([size]).reshape(-1,1)
    pred = regressor.predict(arr)
    return pred[0]

In [None]:
X_poly = np.append(X, X**2, axis=1)