In [None]:
# Importing Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error,mean_absolute_error
import pickle
from sklearn import preprocessing
import matplotlib.pyplot as plt
# using Seaborn Style
plt.style.use('seaborn')

In [None]:
# Reading CSV File using Pandas
df = pd.read_csv('../Real estate.csv')

In [None]:
# Columns Names
df.columns

In [None]:
df.head()

In [None]:
#Normalizing The Dataset
df_norm = pd.DataFrame(preprocessing.normalize(df),columns=df.columns)

In [None]:
df_norm.head()

In [None]:
X,y = df_norm[["X2 house age","X3 distance to the nearest MRT station"]].values,df_norm["Y house price of unit area"].values

In [None]:
# Shape of X and y
X.shape,y.shape

In [None]:
# Plot Between X1 and y && X2 and y
plt.scatter(X[:,0],y,marker='*',label="house age")
plt.scatter(X[:,1],y,marker='.',label="distance to the nearest MRT station")
plt.title("X1 vs y & X2 vs y")
plt.legend()
plt.show()

In [None]:
# Splitting dataset in training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [None]:
# creating Linearregression model
model = LinearRegression()

In [None]:
# Fitting the model
model.fit(X_train,y_train)

In [None]:
# Saving the model using pickle
_model = "model.sav"
with open(_model,"wb+") as f:
    pickle.dump(model,f)

In [None]:
#Loading the model using pickle
with open(_model,"rb+") as f:
    model = pickle.load(f)

In [None]:
# Generating prediction of X_train and X_test 
y_train_predicted = model.predict(X_train)
y_test_predicted = model.predict(X_test)

In [None]:
# Saving Comparison of y_test_actual Vs y_test_predicted
yTestDF = pd.DataFrame({'y_test_actual' : y_test,'y_test_predicted' : y_test_predicted})
yTestDF.to_csv('y_test.csv',index=False)

# Saving Comparison of y_train_actual Vs y_train_predicted
yTrainDF = pd.DataFrame({'y_train_actual' : y_train,'y_train_predicted' : y_train_predicted})
yTrainDF.to_csv('y_train.csv',index=False)

In [None]:
# Model's Coefficient and Intercept
model.coef_,model.intercept_,

#### Linear Regression
**y = m1 * x1 + m2 * x2 + c**
```
Here,
x1 = House age

x2 = Distance to the nearest MRT station

y = House price of unit area


m1 = -0.230986

m2 = -0.00739114

c = 50.512646973853776
```

In [None]:
# Genrating a Best Fit Line
m1, m2 = model.coef_
c = model.intercept_

_X1 = np.linspace(min(X[:,0]),max(X[:,0]),100)
_X2 = np.linspace(min(X[:,1]),max(X[:,1]),100)
_y = m1 * _X1 + m2 * _X2 + c

In [None]:
#plotting The Best Fit Line
plt.plot(_X1,_y,label="house age vs house price of unit area")
plt.plot(_X2,_y,label="distance to the nearest MRT station vs house price of unit area")
plt.title("Best Fit Line")
plt.legend()
plt.show()

In [None]:
# Calculating all Errors
def calculateError(y_actual,y_pred):
    MSE = mean_squared_error(y_actual,y_pred)
    MAE = mean_absolute_error(y_actual,y_pred)
    RMSE = np.sqrt(MSE)
    return MSE,MAE,RMSE

In [None]:
err = calculateError(y_train,y_train_predicted)
trainError = {
    "Mean Square Error" : err[0],
    "Mean Absolute Error" : err[1],
    "Root Mean Square Error" : err[2]
}

err = calculateError(y_test,y_test_predicted)
testError = {
    "Mean Square Error" : err[0],
    "Mean Absolute Error" : err[1],
    "Root Mean Square Error" : err[2]
}

In [None]:
trainError

In [None]:
testError