### **Inserting Libraries**

In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
import warnings
warnings.filterwarnings('ignore')


from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error
from sklearn.model_selection import train_test_split

### **Reading Data**

In [None]:
data = pd.read_csv('/content/rainfall in india 1901-2015.csv')
data.head()

Unnamed: 0,DIVISION,YEAR,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,ANNUAL,Jan-Mar,Apr-Jun,Jul-Sep,Oct-Dec
0,ANDAMAN & NICOBAR ISLANDS,1901,49.2,87.1,29.2,2.3,528.8,517.5,365.1,481.1,332.6,388.5,558.2,33.6,3373.2,136.3,560.3,1696.3,980.3
1,ANDAMAN & NICOBAR ISLANDS,1902,0.0,159.8,12.2,0.0,446.1,537.1,228.9,753.7,666.2,197.2,359.0,160.5,3520.7,159.8,458.3,2185.9,716.7
2,ANDAMAN & NICOBAR ISLANDS,1903,12.7,144.0,0.0,1.0,235.1,479.9,728.4,326.7,339.0,181.2,284.4,225.0,2957.4,156.7,236.1,1874.0,690.6
3,ANDAMAN & NICOBAR ISLANDS,1904,9.4,14.7,0.0,202.4,304.5,495.1,502.0,160.1,820.4,222.2,308.7,40.1,3079.6,24.1,506.9,1977.6,571.0
4,ANDAMAN & NICOBAR ISLANDS,1905,1.3,0.0,3.3,26.9,279.5,628.7,368.7,330.5,297.0,260.7,25.4,344.7,2566.7,1.3,309.7,1624.9,630.8


### **Data Exploration and Preprocessing**

In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4116 entries, 0 to 4115
Data columns (total 19 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   DIVISION  4116 non-null   object 
 1   YEAR      4116 non-null   int64  
 2   JAN       4112 non-null   float64
 3   FEB       4113 non-null   float64
 4   MAR       4110 non-null   float64
 5   APR       4112 non-null   float64
 6   MAY       4113 non-null   float64
 7   JUN       4111 non-null   float64
 8   JUL       4109 non-null   float64
 9   AUG       4112 non-null   float64
 10  SEP       4110 non-null   float64
 11  OCT       4109 non-null   float64
 12  NOV       4105 non-null   float64
 13  DEC       4106 non-null   float64
 14  ANNUAL    4090 non-null   float64
 15  Jan-Mar   4110 non-null   float64
 16  Apr-Jun   4107 non-null   float64
 17  Jul-Sep   4106 non-null   float64
 18  Oct-Dec   4103 non-null   float64
dtypes: float64(17), int64(1), object(1)
memory usage: 611.1+ KB


In [None]:
# to check for the dimensions of the dataframe
data.shape

(4116, 19)

In [None]:
# To check for missing values
data.isnull().sum()

DIVISION     0
YEAR         0
JAN          4
FEB          3
MAR          6
APR          4
MAY          3
JUN          5
JUL          7
AUG          4
SEP          6
OCT          7
NOV         11
DEC         10
ANNUAL      26
Jan-Mar      6
Apr-Jun      9
Jul-Sep     10
Oct-Dec     13
dtype: int64

In [None]:
# checking for any duplicate values present in the dataset
data.duplicated().sum()

0

In [None]:
# replacing all the null values with the mean of the dataset
data = data.fillna(data.mean())
data

Unnamed: 0,DIVISION,YEAR,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,ANNUAL,Jan-Mar,Apr-Jun,Jul-Sep,Oct-Dec
0,ANDAMAN & NICOBAR ISLANDS,1901,49.2,87.1,29.2,2.3,528.8,517.5,365.1,481.1,332.6,388.5,558.2,33.6,3373.2,136.3,560.3,1696.3,980.3
1,ANDAMAN & NICOBAR ISLANDS,1902,0.0,159.8,12.2,0.0,446.1,537.1,228.9,753.7,666.2,197.2,359.0,160.5,3520.7,159.8,458.3,2185.9,716.7
2,ANDAMAN & NICOBAR ISLANDS,1903,12.7,144.0,0.0,1.0,235.1,479.9,728.4,326.7,339.0,181.2,284.4,225.0,2957.4,156.7,236.1,1874.0,690.6
3,ANDAMAN & NICOBAR ISLANDS,1904,9.4,14.7,0.0,202.4,304.5,495.1,502.0,160.1,820.4,222.2,308.7,40.1,3079.6,24.1,506.9,1977.6,571.0
4,ANDAMAN & NICOBAR ISLANDS,1905,1.3,0.0,3.3,26.9,279.5,628.7,368.7,330.5,297.0,260.7,25.4,344.7,2566.7,1.3,309.7,1624.9,630.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4111,LAKSHADWEEP,2011,5.1,2.8,3.1,85.9,107.2,153.6,350.2,254.0,255.2,117.4,184.3,14.9,1533.7,7.9,196.2,1013.0,316.6
4112,LAKSHADWEEP,2012,19.2,0.1,1.6,76.8,21.2,327.0,231.5,381.2,179.8,145.9,12.4,8.8,1405.5,19.3,99.6,1119.5,167.1
4113,LAKSHADWEEP,2013,26.2,34.4,37.5,5.3,88.3,426.2,296.4,154.4,180.0,72.8,78.1,26.7,1426.3,60.6,131.1,1057.0,177.6
4114,LAKSHADWEEP,2014,53.2,16.1,4.4,14.9,57.4,244.1,116.1,466.1,132.2,169.2,59.0,62.3,1395.0,69.3,76.7,958.5,290.5


In [None]:
# no null values present
data.isnull().sum()

DIVISION    0
YEAR        0
JAN         0
FEB         0
MAR         0
APR         0
MAY         0
JUN         0
JUL         0
AUG         0
SEP         0
OCT         0
NOV         0
DEC         0
ANNUAL      0
Jan-Mar     0
Apr-Jun     0
Jul-Sep     0
Oct-Dec     0
dtype: int64

In [None]:
data.describe()

Unnamed: 0,YEAR,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,ANNUAL,Jan-Mar,Apr-Jun,Jul-Sep,Oct-Dec
count,4116.0,4116.0,4116.0,4116.0,4116.0,4116.0,4116.0,4116.0,4116.0,4116.0,4116.0,4116.0,4116.0,4116.0,4116.0,4116.0,4116.0,4116.0
mean,1958.218659,18.95732,21.805325,27.359197,43.127432,85.745417,230.234444,347.214334,290.263497,197.361922,95.507009,39.866163,18.87058,1411.0089,40.747786,155.901753,1064.724769,154.100487
std,33.140898,33.569044,35.896396,46.925176,67.798192,123.189974,234.56812,269.310313,188.678707,135.309591,99.434452,68.593545,42.318098,900.986632,59.265023,201.096692,706.881054,166.678751
min,1901.0,0.0,0.0,0.0,0.0,0.0,0.4,0.0,0.0,0.1,0.0,0.0,0.0,62.3,0.0,0.0,57.4,0.0
25%,1930.0,0.6,0.6,1.0,3.0,8.6,70.475,175.9,156.15,100.6,14.6,0.7,0.1,806.45,4.1,24.2,574.375,34.2
50%,1958.0,6.0,6.7,7.9,15.7,36.7,138.9,284.9,259.5,174.1,65.75,9.7,3.1,1125.45,19.3,75.2,882.25,98.8
75%,1987.0,22.125,26.8,31.225,49.825,96.825,304.95,418.225,377.725,265.725,148.3,45.825,17.7,1635.1,50.3,196.9,1287.55,212.6
max,2015.0,583.7,403.5,605.6,595.1,1168.6,1609.9,2362.8,1664.6,1222.0,948.3,648.9,617.5,6331.1,699.5,1745.8,4536.9,1252.5


In [None]:
annual_div = data[['DIVISION','ANNUAL']].groupby('DIVISION').sum()

px.bar(annual_div, x = data['ANNUAL'], y = data['DIVISION'], orientation = 'h', color = data['DIVISION'], width = 1100, height =
1000).update_layout(title = 'Annual Rainfall  V/S  Division',xaxis_title = 'Rainfall in MM',yaxis_title = 'Division', legend =
dict(title = 'States')).update_yaxes(categoryorder = 'total descending')

# Visualization showing the difference of annual rainfall in different states

###The Above Plot shows the total rainfall from year 1901-2015 in each division. From this plot we can say :-


*   West Rajasthan, Haryana, Punjab, Kutch, Delhi have received the least rainfall.
*   Coastal Karnataka, Goa, Kerala, Arunachal Pradesh, Meghalaya, Assam, Tripura and Andaman received the most rainfall.


In [None]:
annual_year = data.groupby('YEAR').sum()['ANNUAL']

px.line(annual_year, orientation = 'v', width = 1100, height = 1000 ,markers = True).update_layout( title = 'Annual Rainfall over Years',
xaxis_title="Year", yaxis_title="Rainfall in MM", showlegend = False)

# Visualization showing the annual rainfall over years from 1901 to 2015

###The Above Plot shows the total rainfall from year 1901-2015 in each division. From this plot we can say :-

*   The Maximum Rainfall happened from the 1950s.



In [None]:
year_annual_month = data[['YEAR','JAN','FEB','MAR','APR','MAY','JUN','JUL','AUG','SEP','OCT','NOV','DEC']].groupby('YEAR').sum()

px.line(year_annual_month, orientation = 'v', width = 1100, height = 1000 ).update_layout( title = 'Year V/S Rainfall in each Month',
xaxis_title="Year", yaxis_title="Rainfall in MM", legend = dict(title = 'Months'))

# Visualization showing Annual Rainfall for each Month in every Year

###The Above Plot shows the year v/s rainfall in each month. From this plot we can say :-


*   July experiences the heavy rainfall.
*   February experiences the least rainfall.



In [None]:
annual_year_quaterly = data[['YEAR','Jan-Mar','Apr-Jun','Jul-Sep','Oct-Dec']].groupby('YEAR').sum()

px.line(annual_year_quaterly, orientation = 'v', width = 1100, height = 1000 ).update_layout( title = 'Quaterly Rainfall V/S Year',
xaxis_title="Year", yaxis_title="Rainfall in MM", legend = dict(title = 'Quarters'))

# Visualizations showing the comparison of quaterly rainfall for each year.

###From this plot we can say :-


*   quaterly combination July,August,September receives huge rainfall.
*   quaterly combination January,February,March receives least rainfall.



In [None]:
div_annual_monthly = data[['DIVISION','JAN','FEB','MAR','APR','MAY','JUN','JUL','AUG','SEP','OCT','NOV','DEC']].groupby('DIVISION').sum()

px.bar(div_annual_monthly, orientation = 'h', width = 1100, height = 1000).update_layout(title = 'Division V/S Rainfall in each Month',
xaxis_title = 'Rainfall in MM',yaxis_title = 'Division', legend = dict(title = 'Months'))

# Visualization shows the divisions V/S rainfall in every month

In [None]:
div_annual_quaterly = data[['DIVISION','Jan-Mar','Apr-Jun','Jul-Sep','Oct-Dec']].groupby('DIVISION').sum()

px.bar(div_annual_quaterly, orientation = 'h', width = 1100, height = 1000).update_layout(title = 'Division V/S Rainfall in each Month',
xaxis_title = 'Rainfall in MM',yaxis_title = 'Division', legend = dict(title = 'Quarters'))

#Visualization shows the Division V/S rainfall in quarters

In [None]:
heat_vis = round(data[['JAN','FEB','MAR','APR','MAY','JUN','JUL','AUG','SEP','OCT','NOV','DEC', 'ANNUAL']].corr(), 2)
px.imshow(heat_vis, text_auto=True, aspect ='auto')

# Visualization showing the correlation of the different columns with each other in the dataframe.

In [None]:
data.head()

Unnamed: 0,DIVISION,YEAR,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,ANNUAL,Jan-Mar,Apr-Jun,Jul-Sep,Oct-Dec
0,ANDAMAN & NICOBAR ISLANDS,1901,49.2,87.1,29.2,2.3,528.8,517.5,365.1,481.1,332.6,388.5,558.2,33.6,3373.2,136.3,560.3,1696.3,980.3
1,ANDAMAN & NICOBAR ISLANDS,1902,0.0,159.8,12.2,0.0,446.1,537.1,228.9,753.7,666.2,197.2,359.0,160.5,3520.7,159.8,458.3,2185.9,716.7
2,ANDAMAN & NICOBAR ISLANDS,1903,12.7,144.0,0.0,1.0,235.1,479.9,728.4,326.7,339.0,181.2,284.4,225.0,2957.4,156.7,236.1,1874.0,690.6
3,ANDAMAN & NICOBAR ISLANDS,1904,9.4,14.7,0.0,202.4,304.5,495.1,502.0,160.1,820.4,222.2,308.7,40.1,3079.6,24.1,506.9,1977.6,571.0
4,ANDAMAN & NICOBAR ISLANDS,1905,1.3,0.0,3.3,26.9,279.5,628.7,368.7,330.5,297.0,260.7,25.4,344.7,2566.7,1.3,309.7,1624.9,630.8


In [None]:
X = data.iloc[:,[0,1,2,3,4,5,6,7,8,9,10,11,12,13]].values
Y = data.iloc[:,[14]].values
# Extracting the data in the form of 1D array and splitting it into two variables.

In [None]:
X

array([['ANDAMAN & NICOBAR ISLANDS', 1901, 49.2, ..., 388.5, 558.2, 33.6],
       ['ANDAMAN & NICOBAR ISLANDS', 1902, 0.0, ..., 197.2, 359.0, 160.5],
       ['ANDAMAN & NICOBAR ISLANDS', 1903, 12.7, ..., 181.2, 284.4,
        225.0],
       ...,
       ['LAKSHADWEEP', 2013, 26.2, ..., 72.8, 78.1, 26.7],
       ['LAKSHADWEEP', 2014, 53.2, ..., 169.2, 59.0, 62.3],
       ['LAKSHADWEEP', 2015, 2.2, ..., 165.4, 231.0, 159.0]], dtype=object)

In [None]:
Y

array([[3373.2],
       [3520.7],
       [2957.4],
       ...,
       [1426.3],
       [1395. ],
       [1642.9]])

## Encoding Dataset

In [None]:
# Performing Label Encoding to all columns of variable X and Y
LE1 = LabelEncoder()
X[:,0] = LE1.fit_transform(X[:,0])
LE2 = LabelEncoder()
X[:,1] = LE2.fit_transform(X[:,1])
LE3 = LabelEncoder()
X[:,2] = LE3.fit_transform(X[:,2])
LE4 = LabelEncoder()
X[:,3] = LE4.fit_transform(X[:,3])
LE5 = LabelEncoder()
X[:,4] = LE5.fit_transform(X[:,4])
LE6 = LabelEncoder()
X[:,5] = LE6.fit_transform(X[:,5])
LE7 = LabelEncoder()
X[:,6] = LE7.fit_transform(X[:,6])
LE8 = LabelEncoder()
X[:,7] = LE8.fit_transform(X[:,7])
LE9 = LabelEncoder()
X[:,8] = LE9.fit_transform(X[:,8])
LE10 = LabelEncoder()
X[:,9] = LE10.fit_transform(X[:,9])
LE11 = LabelEncoder()
X[:,10] = LE11.fit_transform(X[:,10])
LE12 = LabelEncoder()
X[:,11] = LE12.fit_transform(X[:,11])
LE13 = LabelEncoder()
X[:,12] = LE13.fit_transform(X[:,12])
LE14 = LabelEncoder()
X[:,13] = LE13.fit_transform(X[:,13])
LE15 = LabelEncoder()
Y = LE15.fit_transform(Y)

In [None]:
# printing the value of X in the form of 2D array
print(X)
print(X.ndim)

[[0 0 446 ... 1882 1237 323]
 [0 1 0 ... 1417 1219 730]
 [0 2 127 ... 1350 1168 766]
 ...
 [18 112 261 ... 632 645 265]
 [18 113 475 ... 1283 515 513]
 [18 114 22 ... 1260 1109 727]]
2


In [None]:
# printing the value of Y in the form of 1D array
print(Y)
print(Y.ndim)

[3549 3595 3354 ... 2426 2371 2712]
1


In [None]:
# converting 1D array Y into 2D array
Y = Y.reshape(-1,1)
print(Y)
print(Y.ndim)

[[3549]
 [3595]
 [3354]
 ...
 [2426]
 [2371]
 [2712]]
2


##Feature Scaling

In [None]:
# Performing Standard Scaler algorithm on variable X and Y
FS = StandardScaler()
X = FS.fit_transform(X)
Y = FS.fit_transform(Y)

In [None]:
print(X)

[[-1.70101042 -1.72673688  1.49008972 ...  2.07214959  2.83993465
   0.96225756]
 [-1.70101042 -1.69655901 -0.76388594 ...  1.26613398  2.78727116
   3.01534725]
 [-1.70101042 -1.66638115 -0.12205879 ...  1.1499984   2.63805794
   3.19694732]
 ...
 [ 0.03932239  1.65318383  0.55514466 ... -0.09455904  1.10789093
   0.66967967]
 [ 0.03932239  1.6833617   1.63664868 ...  1.03386282  0.72754349
   1.92070238]
 [ 0.03932239  1.71353956 -0.65270329 ...  0.99399538  2.46543871
   3.00021391]]


In [None]:
print(Y)

[[1.64040159]
 [1.68426451]
 [1.45446095]
 ...
 [0.56957418]
 [0.51712938]
 [0.84228713]]


## Splitting the Dataset into Training and Test Set

In [None]:
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size = 0.3, random_state = 0)
#X_train contains independent variables of train data and Y_train contain dependent variables of train data
#X_test contains independent variables of test data and Y_test contains dependent variables of test data

In [None]:
print(X_train)

[[ 1.58628488 -0.27819943 -0.67291832 ... -1.16924652 -0.77921752
  -0.65700973]
 [ 0.32937786 -0.91193456 -0.54657439 ...  1.75147671 -0.2408796
  -0.66205418]
 [ 0.03932239 -1.66638115  1.87922902 ... -0.01309079  0.32964156
   0.2812573 ]
 ...
 [-0.44410339  0.26500211  0.78256373 ... -0.59376871 -0.08874063
   2.71772491]
 [-0.05736277  1.29104947 -0.48087555 ...  0.14464559 -0.68851928
  -0.66709862]
 [ 0.13600754  1.59282811 -0.76388594 ... -0.22802829 -0.77921752
  -0.66709862]]


In [None]:
print(Y_train)

[[-1.70653004]
 [ 1.158863  ]
 [ 0.54764272]
 ...
 [ 0.75742191]
 [ 1.12072132]
 [-0.74726701]]


In [None]:
print(X_test)

[[ 1.1028591  -0.97229029  2.45535733 ...  0.83972572  1.67256059
   0.63941299]
 [-0.34741824 -1.5456697   2.62213131 ... -0.95084229 -0.68851928
  -0.62674305]
 [-0.73415886 -1.00246815 -0.22313394 ...  1.17253217 -0.68851928
  -0.66709862]
 ...
 [ 1.58628488  0.62713648 -0.61732699 ... -1.0080434  -0.5597863
  -0.58134303]
 [-1.12089948  0.89873725 -0.76388594 ...  0.51731948 -0.70899953
  -0.10212063]
 [-0.44410339 -1.5456697   2.90514171 ... -0.93350862 -0.77921752
  -0.66709862]]


In [None]:
print(Y_test)

[[-0.83213223]
 [-0.15893174]
 [ 0.30162894]
 ...
 [-1.65217816]
 [ 1.66614722]
 [-0.68147263]]


## Training Models



#### 1.  **Random Forest**





In [None]:
# creating and fiting the Random Forest Regressor Model
RF = RandomForestRegressor(n_estimators = 100,random_state = 0).fit(X_train,Y_train)

In [None]:
# Predicting on Test Data and Train Data
Y_test_pred = RF.predict(X_test)
Y_train_pred = RF.predict(X_train)

In [None]:
print('Random Forest')
print("-"*50)
# Showing Training Set Performance

print("Model Performance for Training Set")
train_acc_score = r2_score(Y_train,Y_train_pred)
print(" r2-Score : {}"  .format(round(train_acc_score,2)))
train_mae_score = mean_absolute_error(Y_train,Y_train_pred)
print(" Mean Absolute Error : {}"  .format(round(train_mae_score,2)))
train_mse_score = mean_squared_error(Y_train,Y_train_pred)
print(" Mean Squared Error : {}"  .format(round(train_mse_score,2)))
train_rmse_score = np.sqrt(mean_absolute_error(Y_train,Y_train_pred))
print(" Root Mean Squared Error : {}"  .format(round(train_rmse_score,2)))
print("-"*50)

# Showing Testing Set Performance
print("Model Performance for Test Set")

test_acc_score = r2_score(Y_test,Y_test_pred)
print(" r2-Score : {}"  .format(round(test_acc_score,2)))
test_mae_score = mean_absolute_error(Y_test,Y_test_pred)
print(" Mean Absolute Error : {}"  .format(round(test_mae_score,2)))
test_mse_score = mean_squared_error(Y_test,Y_test_pred)
print(" Mean Squared Error : {}"  .format(round(test_mse_score,2)))
test_rmse_score = np.sqrt(mean_absolute_error(Y_test,Y_test_pred))
print(" Root Mean Squared Error : {}"  .format(round(test_rmse_score,2)))
print("-"*50)

Random Forest
--------------------------------------------------
Model Performance for Training Set
 r2-Score : 0.99
 Mean Absolute Error : 0.05
 Mean Squared Error : 0.01
 Root Mean Squared Error : 0.23
--------------------------------------------------
Model Performance for Test Set
 r2-Score : 0.97
 Mean Absolute Error : 0.13
 Mean Squared Error : 0.03
 Root Mean Squared Error : 0.36
--------------------------------------------------


#### 2.  **Decision Tree**

In [None]:
# creating and fitting Decision Tree Regressor Model
DTR = DecisionTreeRegressor().fit(X_train,Y_train)

In [None]:
# Predicting on Test and Train Data
Y_tra_pred = DTR.predict(X_train)
Y_tes_pred = DTR.predict(X_test)

In [None]:
print('Decision Tree')
print("-"*50)
# Showing Training Set Performance

print("Model Performance for Training Set")
tra_acc_score = r2_score(Y_train,Y_tra_pred)
print(" r2-Score : {}"  .format(round(tra_acc_score,2)))
tra_mae_score = mean_absolute_error(Y_train,Y_tra_pred)
print(" Mean Absolute Error : {}"  .format(round(tra_mae_score,2)))
tra_mse_score = mean_squared_error(Y_train,Y_tra_pred)
print(" Mean Squared Error : {}"  .format(round(tra_mse_score,2)))
tra_rmse_score = np.sqrt(mean_absolute_error(Y_train,Y_tra_pred))
print(" Root Mean Squared Error : {}"  .format(round(tra_rmse_score,2)))
print("-"*50)

# Showing Testing Set Performance

print("Model Performance for Test Set")
tes_acc_score = r2_score(Y_test,Y_tes_pred)
print(" r2-Score : {}"  .format(round(tes_acc_score,2)))
tes_mae_score = mean_absolute_error(Y_test,Y_tes_pred)
print(" Mean Absolute Error : {}"  .format(round(tes_mae_score,2)))
tes_mse_score = mean_squared_error(Y_test,Y_test_pred)
print(" Mean Squared Error : {}"  .format(round(tes_mse_score,2)))
tes_rmse_score = np.sqrt(mean_absolute_error(Y_test,Y_tes_pred))
print(" Root Mean Squared Error : {}"  .format(round(tes_rmse_score,2)))
print("-"*50)

Decision Tree
--------------------------------------------------
Model Performance for Training Set
 r2-Score : 1.0
 Mean Absolute Error : 0.0
 Mean Squared Error : 0.0
 Root Mean Squared Error : 0.0
--------------------------------------------------
Model Performance for Test Set
 r2-Score : 0.89
 Mean Absolute Error : 0.24
 Mean Squared Error : 0.03
 Root Mean Squared Error : 0.49
--------------------------------------------------


#### 3.  **SVR**

In [None]:
# creating and fitting a SVR model
SV_reg = SVR().fit(X_train,Y_train)

In [None]:
# Predicting on Test and Train Data
Y_tr_pred = SV_reg.predict(X_train)
Y_te_pred = SV_reg.predict(X_test)

In [None]:
print('Support Vector Regressor')
print("-"*50)
# Showing Training Set Performance

print("Model Performance for Training Set")
tr_acc_score = r2_score(Y_train,Y_tr_pred)
print(" r2-Score : {}"  .format(round(tr_acc_score,2)))
tr_mae_score = mean_absolute_error(Y_train,Y_tr_pred)
print(" Mean Absolute Error : {}"  .format(round(tr_mae_score,2)))
tr_mse_score = mean_squared_error(Y_train,Y_tr_pred)
print(" Mean Squared Error : {}"  .format(round(tr_mse_score,2)))
tr_rmse_score = np.sqrt(mean_absolute_error(Y_train,Y_tr_pred))
print(" Root Mean Squared Error : {}"  .format(round(tr_rmse_score,2)))
print("-"*50)

# Showing Testing Set Performance

print("Model Performance for Test Set")
te_acc_score = r2_score(Y_test,Y_te_pred)
print(" r2-Score : {}"  .format(round(te_acc_score,2)))
te_mae_score = mean_absolute_error(Y_test,Y_te_pred)
print(" Mean Absolute Error : {}"  .format(round(te_mae_score,2)))
te_mse_score = mean_squared_error(Y_test,Y_te_pred)
print(" Mean Squared Error : {}"  .format(round(te_mse_score,2)))
te_rmse_score = np.sqrt(mean_absolute_error(Y_test,Y_te_pred))
print(" Root Mean Squared Error : {}"  .format(round(te_rmse_score,2)))
print("-"*50)

Support Vector Regressor
--------------------------------------------------
Model Performance for Training Set
 r2-Score : 0.99
 Mean Absolute Error : 0.06
 Mean Squared Error : 0.01
 Root Mean Squared Error : 0.24
--------------------------------------------------
Model Performance for Test Set
 r2-Score : 0.99
 Mean Absolute Error : 0.07
 Mean Squared Error : 0.01
 Root Mean Squared Error : 0.27
--------------------------------------------------


#### 4. **KNN**

In [None]:
# Creating and Fitting a KNN Model
KNR = KNeighborsRegressor(n_neighbors = 4).fit(X_train,Y_train)

In [None]:
# Predicting on Test And Train Data
Yktr_pred = KNR.predict(X_train)
Ykte_pred = KNR.predict(X_test)

In [None]:
print('K Nearest Neighbors')
print("-"*50)
# Showing Training Set Performance

print("Model Performance for Training Set")
Ktr_acc_score = r2_score(Y_train,Yktr_pred)
print(" r2-Score : {}"  .format(round(Ktr_acc_score,2)))
Ktr_mae_score = mean_absolute_error(Y_train,Yktr_pred)
print(" Mean Absolute Error : {}"  .format(round(Ktr_mae_score,2)))
Ktr_mse_score = mean_squared_error(Y_train,Yktr_pred)
print(" Mean Squared Error : {}"  .format(round(Ktr_mse_score,2)))
Ktr_rmse_score = np.sqrt(mean_absolute_error(Y_train,Yktr_pred))
print(" Root Mean Squared Error : {}"  .format(round(Ktr_rmse_score,2)))
print("-"*50)

# Showing Testing Set Performance

print("Model Performance for Test Set")
Kte_acc_score = r2_score(Y_test,Ykte_pred)
print(" r2-Score : {}"  .format(round(Kte_acc_score,2)))
Kte_mae_score = mean_absolute_error(Y_test,Ykte_pred)
print(" Mean Absolute Error : {}"  .format(round(Kte_mae_score,2)))
Kte_mse_score = mean_squared_error(Y_test,Ykte_pred)
print(" Mean Squared Error : {}"  .format(round(Kte_mse_score,2)))
Kte_rmse_score = np.sqrt(mean_absolute_error(Y_test,Ykte_pred))
print(" Root Mean Squared Error : {}"  .format(round(Kte_rmse_score,2)))
print("-"*50)

K Nearest Neighbors
--------------------------------------------------
Model Performance for Training Set
 r2-Score : 0.97
 Mean Absolute Error : 0.12
 Mean Squared Error : 0.03
 Root Mean Squared Error : 0.35
--------------------------------------------------
Model Performance for Test Set
 r2-Score : 0.96
 Mean Absolute Error : 0.15
 Mean Squared Error : 0.04
 Root Mean Squared Error : 0.39
--------------------------------------------------


 ### **Chart Comparison of Algorithms**

In [None]:
y1 = ['Random Forest Regressor','Decision Tree Regressor','Support Vector Regressor','KNeighbors Regressor']
x1 = ['R2 Score','MAE','MSE','RMSE']

z1 = [[(round(train_acc_score,2)),(round(train_mae_score,2)),(round(train_mse_score,2)),(round(train_rmse_score,2))],
     [(round(tra_acc_score,2)),(round(tra_mae_score,2)),(round(tra_mse_score,2)),(round(tra_rmse_score,2))],
     [(round(tr_acc_score,2)),(round(tr_mae_score,2)),(round(tr_mse_score,2)),(round(tr_rmse_score,2))],
     [(round(Ktr_acc_score,2)),(round(Ktr_mae_score,2)),(round(Ktr_mse_score,2)),(round(Ktr_rmse_score,2))]]

px.imshow(z1, x=x1, y=y1, aspect="auto", title = "Scores for Training Dataset", text_auto = True).update_xaxes(side="top")

In [None]:
y2 = ['Random Forest Regressor','Decision Tree Regressor','Support Vector Regressor','KNeighbors Regressor']
x2 = ['R2 Score','MAE','MSE','RMSE']

z2 = [[(round(test_acc_score,2)),(round(test_mae_score,2)),(round(test_mse_score,2)),(round(test_rmse_score,2))],
     [(round(tes_acc_score,2)),(round(tes_mae_score,2)),(round(tes_mse_score,2)),(round(tes_rmse_score,2))],
     [(round(te_acc_score,2)),(round(te_mae_score,2)),(round(te_mse_score,2)),(round(te_rmse_score,2))],
     [(round(Kte_acc_score,2)),(round(Kte_mae_score,2)),(round(Kte_mse_score,2)),(round(Kte_rmse_score,2))]]

px.imshow(z2, x=x2, y=y2, aspect="auto", title = "Scores for Test Dataset", text_auto = True).update_xaxes(side="top")