# Machine Learning

## - Linear Regression

### Steps: 
1. Import all the modules
2. Load the dataset
3. Seperate data into independent and dependent features
4. Split data into train and test
5. Model training
6. Model Prediction
7. Model Evaluation


In [68]:
!pip install scikit-learn




[notice] A new release of pip is available: 24.0 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


### Step 1

In [69]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn 
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [70]:
import warnings
warnings.filterwarnings('ignore')

### Step 2

In [71]:
df = pd.read_csv("csv/EDA_Housing.csv")

### Step 3

In [72]:
df.columns

Index(['price', 'area', 'bedrooms', 'bathrooms', 'stories', 'mainroad',
       'guestroom', 'basement', 'hotwaterheating', 'airconditioning',
       'parking', 'prefarea', 'furnishingstatus'],
      dtype='object')

In [73]:
X = df[['area', 'bedrooms', 'bathrooms', 'stories', 'mainroad',
       'guestroom', 'basement', 'hotwaterheating', 'airconditioning',
       'parking', 'prefarea', 'furnishingstatus']]

In [74]:
X

Unnamed: 0,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,7420,4,2,3,1,0,0,0,1,2,1,2
1,8960,4,4,4,1,0,0,0,1,3,0,2
2,9960,3,2,2,1,0,1,0,0,2,1,1
3,7500,4,2,2,1,0,1,0,1,3,1,2
4,7420,4,1,2,1,1,1,0,1,2,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...
524,3000,2,1,1,1,0,1,0,0,2,0,0
525,2400,3,1,1,0,0,0,0,0,0,0,1
526,3620,2,1,1,1,0,0,0,0,0,0,0
527,2910,3,1,1,0,0,0,0,0,0,0,2


In [75]:
y = df[['price']]

In [76]:
y


Unnamed: 0,price
0,13300000
1,12250000
2,12250000
3,12215000
4,11410000
...,...
524,1820000
525,1767150
526,1750000
527,1750000


### Step 4

In [77]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [78]:
X_train

Unnamed: 0,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
137,6360,3,1,3,1,0,0,0,0,0,1,1
525,2400,3,1,1,0,0,0,0,0,0,0,1
416,6060,3,1,1,1,1,1,0,0,0,0,2
371,3850,3,1,1,1,0,0,0,0,2,0,0
69,4260,4,2,2,1,0,0,1,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...
71,6500,3,2,3,1,0,0,0,1,0,1,2
106,9620,3,1,1,1,0,1,0,0,2,1,2
270,6650,3,1,2,1,1,0,0,0,0,0,1
435,6750,2,1,1,1,0,0,0,0,0,0,1


In [79]:
X_test

Unnamed: 0,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
140,4400,4,1,2,1,0,0,0,1,2,1,1
397,1950,3,2,2,1,0,1,0,0,0,1,0
6,8580,4,3,4,1,0,0,0,1,2,1,1
334,4820,3,1,2,1,0,0,0,0,0,0,1
322,2145,4,2,1,1,0,1,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...
340,8250,3,1,1,1,0,0,0,0,2,0,2
457,8050,2,1,1,1,0,0,0,0,0,0,0
131,5800,3,2,4,1,0,0,0,1,0,0,0
338,2856,3,1,3,1,0,0,0,0,0,1,2


In [80]:
y_train

Unnamed: 0,price
137,5600000
525,1767150
416,3290000
371,3535000
69,6650000
...,...
71,6650000
106,6083000
270,4235000
435,3150000


In [81]:
y_test

Unnamed: 0,price
140,5565000
397,3430000
6,10150000
334,3780000
322,3920000
...,...
340,3773000
457,3003000
131,5740000
338,3780000


### Step 5

In [82]:
X.shape

(529, 12)

In [83]:
model = LinearRegression()
model.fit(X_train, y_train)

### Step 6

In [85]:
y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)

In [88]:
X_train[:3]

Unnamed: 0,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
137,6360,3,1,3,1,0,0,0,0,0,1,1
525,2400,3,1,1,0,0,0,0,0,0,0,1
416,6060,3,1,1,1,1,1,0,0,0,0,2


In [89]:
y_train[:3]

Unnamed: 0,price
137,5600000
525,1767150
416,3290000


In [93]:
y_pred_train[:3]

array([[5238737.71169141],
       [2329845.65503401],
       [4628988.83068627]])

In [94]:
y_test[:3]

Unnamed: 0,price
140,5565000
397,3430000
6,10150000


In [95]:
y_pred_test[:3]

array([[5520323.49677721],
       [4724855.91500771],
       [9681730.44604017]])

### Step 7


In [108]:
def model_evaluation(actual, predicted):
    mse = mean_squared_error(actual, predicted)
    rmse = np.sqrt(mean_squared_error(actual, predicted))
    mae = mean_absolute_error(actual, predicted)
    r2 = r2_score(actual, predicted)

    print(f'MSE : {mse}')
    print(f'RMSE : {rmse}')
    print(f'MAE : {mae}')
    print(f'R2_Score : {r2}')

In [109]:
model_evaluation(y_train, y_pred_train)

MSE : 962080473352.9672
RMSE : 980857.0096364542
MAE : 723422.0276479106
R2_Score : 0.6841775957471263


In [111]:
model_evaluation(y_test, y_pred_test)

MSE : 1838995332478.42
RMSE : 1356095.6206987838
MAE : 941235.0045185983
R2_Score : 0.6278218700789799


model.predict() - used for user input