In [44]:
# Importing necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor



In [13]:
#Load the dataset
df = pd.read_csv('Boston.csv')

# Display the first five rows
df.head() # for ten rows - df.head(10)

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [14]:
df.tail() # specify number if you want withn ()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE
501,0.06263,0.0,11.93,0,0.573,6.593,69.1,2.4786,1,273,21.0,391.99,9.67,22.4
502,0.04527,0.0,11.93,0,0.573,6.12,76.7,2.2875,1,273,21.0,396.9,9.08,20.6
503,0.06076,0.0,11.93,0,0.573,6.976,91.0,2.1675,1,273,21.0,396.9,5.64,23.9
504,0.10959,0.0,11.93,0,0.573,6.794,89.3,2.3889,1,273,21.0,393.45,6.48,22.0
505,0.04741,0.0,11.93,0,0.573,6.03,80.8,2.505,1,273,21.0,396.9,7.88,11.9


In [15]:
#Check the missing values
df.isnull().sum()

CRIM       0
ZN         0
INDUS      0
CHAS       0
NOX        0
RM         0
AGE        0
DIS        0
RAD        0
TAX        0
PTRATIO    0
B          0
LSTAT      0
PRICE      0
dtype: int64

In [27]:
# Feature (independent variable) and Target (dependent variable)
# Independent variable - Feature is Temparature!
X = df.drop('PRICE', axis=1)

# Dependent variable - Target is Energy Consumption (KWh)
y = df['PRICE']

In [28]:
#Split the data into training and testing sets
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state = 42)

In [33]:
# Create a Regression Model
model = LinearRegression()

#Train the Model
model.fit(X_train,y_train)
                        

In [34]:
# making predictions on the test set
y_pred = model.predict(X_test)
y_pred

array([28.99672362, 36.02556534, 14.81694405, 25.03197915, 18.76987992,
       23.25442929, 17.66253818, 14.34119   , 23.01320703, 20.63245597,
       24.90850512, 18.63883645, -6.08842184, 21.75834668, 19.23922576,
       26.19319733, 20.64773313,  5.79472718, 40.50033966, 17.61289074,
       27.24909479, 30.06625441, 11.34179277, 24.16077616, 17.86058499,
       15.83609765, 22.78148106, 14.57704449, 22.43626052, 19.19631835,
       22.43383455, 25.21979081, 25.93909562, 17.70162434, 16.76911711,
       16.95125411, 31.23340153, 20.13246729, 23.76579011, 24.6322925 ,
       13.94204955, 32.25576301, 42.67251161, 17.32745046, 27.27618614,
       16.99310991, 14.07009109, 25.90341861, 20.29485982, 29.95339638,
       21.28860173, 34.34451856, 16.04739105, 26.22562412, 39.53939798,
       22.57950697, 18.84531367, 32.72531661, 25.0673037 , 12.88628956,
       22.68221908, 30.48287757, 31.52626806, 15.90148607, 20.22094826,
       16.71089812, 20.52384893, 25.96356264, 30.61607978, 11.59

In [35]:
# Calculate MSE and R squared error
mse = mean_squared_error(y_test,y_pred)
print(f'mean squared error is {mse}')
r2=r2_score(y_test,y_pred)
print(f'R-Squared is {r2}')

mean squared error is 24.291119474973538
R-Squared is 0.6687594935356317


In [38]:
model = DecisionTreeRegressor()
model.fit(X_train, y_train)


In [39]:
y_pred = model.predict(X_test)


In [42]:
def compute_mse(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

def compute_rse(y_true, y_pred):
    residuals = y_true - y_pred
    rss = np.sum(residuals ** 2)  # Residual Sum of Squares
    degrees_of_freedom = len(y_true) - 2  # Assuming a simple linear model
    return np.sqrt(rss / degrees_of_freedom)


In [43]:
mse_value = compute_mse(y_test, y_pred)
rse_value = compute_rse(y_test, y_pred)

print("MSE:", mse_value)
print("RSE:", rse_value)

MSE: 11.108529411764707
RSE: 3.3661105151197876


In [45]:

# Train Random Forest Regressor
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


In [46]:
y_pred = model.predict(X_test)

In [47]:
mse_value = compute_mse(y_test, y_pred)
rse_value = compute_rse(y_test, y_pred)

print("MSE:", mse_value)
print("RSE:", rse_value)


MSE: 7.901513892156864
RSE: 2.8389336325458547
