# Part-1: Working with models


In [None]:
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns
from sklearn.linear_model import LinearRegression as LR
from sklearn.preprocessing import OneHotEncoder, StandardScaler 
from sklearn.model_selection import train_test_split as TTS
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, mean_absolute_error, mean_squared_error, r2_score
import numpy as np

In [None]:
Data = pd.read_csv("./Part_1.csv")
display(Data.head(3))

Data['CouncilArea'].fillna('NDF', inplace=True)
Data['Regionname'].fillna('NDF', inplace=True)


## 1st need to perform the Scalling and Encoding for better aacuracy

In [None]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder

# Creating a copy of the Data
New_Data = Data.copy()

# Separating the feature columns
Numerics = New_Data.select_dtypes(include=["int64", "float64"]).columns.tolist()
Categorical = New_Data.select_dtypes(include=["object"]).columns.tolist()

# Scaling the numerical columns
Scalar = StandardScaler()
for col in Numerics:
    New_Data[col] = Scalar.fit_transform(New_Data[col].values.reshape(-1, 1))

# Encoding the categorical columns
Encoder = OneHotEncoder(sparse_output=False)
for col in Categorical:
    New_Data[col] = Encoder.fit_transform(New_Data[col].values.reshape(-1, 1)).astype(float)

display(New_Data)

#### 1) Create the target data and feature data where target data is price

#### 2) Create a linear regression model for Target and feature data

#### 3) Check if the model is overfitting or underfitting or it is accurate

#### 4) If the model is overfitting then apply ridge and lasso regression algorithm

In [None]:
X = New_Data.drop(columns=["Price"])
Y = New_Data["Price"]


X_train, X_test, Y_train, Y_test = TTS(X, Y, train_size=0.7, random_state=100)

Model = LR()
Model.fit(X_train, Y_train)

Y_Pred = Model.predict(X_test)
print("\nNew Prediction = ", Y_Pred)

Train, Test = Model.score(X_train, Y_train) * 100, Model.score(X_test, Y_test) * 100
print("\nTraining Score = ", Train)
print("\nTesting Score = ", Test)


#### 5) Extract slope and intercept value from the model

#### 6) Display Mean Squared Error

#### 7) Display Mean Absolute Error

#### 8) Display Root mean Squared error

#### 9) Display R2 score

In [None]:
import sklearn.metrics as metrics

slope = model.coef_[0]
intercept = model.intercept_
print(f"Slope: {slope} and Intercept: {intercept}")
print()

mse = mean_squared_error(Y_test, Y_Pred)
print(f"Mean Squared Error: {mse}")
print()

mae = mean_absolute_error(Y_test, Y_Pred)
print(f"Mean Absolute Error: {mae}")
print()

rmse = np.sqrt(mean_squared_error(Y_test, Y_Pred))
print(f"Root Mean Squared Error: {rmse}")
print()

r2_score = metrics.r2_score(Y_test, Y_Pred)
print(f"R2 Score: {r2_score}")
print()

