<a href="https://colab.research.google.com/github/SandySingh72/DATA_Analytics/blob/main/AI_Example_Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [None]:
conc = pd.read_csv('Concrete_Data.csv')
train, test = train_test_split(conc, test_size=0.3, random_state=25)

In [None]:
train.shape, test.shape

((721, 9), (309, 9))

In [None]:
x_train = train.drop('Strength', axis=1)
y_train = train['Strength']
x_test = test.drop('Strength', axis=1)
y_test = test['Strength']

In [None]:
x_train.shape, y_train.shape, x_test.shape, y_test.shape

((721, 8), (721,), (309, 8), (309,))

# **Linear Regression**

In [None]:
lr = LinearRegression()
lr.fit(x_train, y_train)
y_pred = lr.predict(x_test)
y_pred

array([24.63744908, 39.90407578, 70.01775407, 20.40523647, 32.59633754,
       29.29390819, 44.96089815, 36.94349917, 59.56573569, 40.28756844,
       28.7858216 , 25.8387483 , 24.62666675, 22.82237388, 26.34396966,
       22.94394241, 47.87730111, 64.22769893, 35.15075036, 48.63092068,
       22.54088407, 49.67979217, 56.01961194, 43.10580997, 27.15995207,
       18.4706122 , 21.60258711, 26.09174258, 51.29184153, 27.51093117,
       13.00769019, 22.64677381, 46.39998826, 10.76923133, 25.83391714,
       38.19227805, 36.57670305, 19.57037283, 27.7534638 , 13.3512502 ,
       33.45938173, 29.27657092, 16.91270404, 23.54596594, 55.58346892,
       43.69280218, 22.48786911, 35.45353514, 22.39237411, 30.52314489,
       25.55395984, 55.20785886, 49.88908525, 28.73733324, 55.15223318,
       31.60392915, 32.24741192, 37.36413005, 23.78647336, 32.58828066,
       25.85259258, 18.41438378, 19.10081516, 32.5501967 , 22.32961108,
       43.87798892, 33.2838014 , 24.58206306, 42.47344244, 24.63

In [None]:
lr = LinearRegression()
lr.fit(x_train, y_train)
y_pred = lr.predict(x_test)
mse = mean_squared_error(y_test, y_pred)
mse

99.36941323003587

# **Decision Tree Regression**

In [None]:
dtr = DecisionTreeRegressor(random_state=25)
dtr.fit(x_train, y_train)
y_pred = dtr.predict(x_test)
mse = mean_squared_error(y_test, y_pred)
mse

50.996473139158574

# **Random Forest Regressor**

In [None]:
rf = RandomForestRegressor(random_state=25)
rf.fit(x_train, y_train)
y_pred = rf.predict(x_test)
mse = mean_squared_error(y_test, y_pred)
mse

31.04131578144407

# **Gradient Boosting Regressor**

In [None]:
gbm = GradientBoostingRegressor(random_state=25)
gbm.fit(x_train, y_train)
y_pred = gbm.predict(x_test)
mse = mean_squared_error(y_test, y_pred)
mse

30.302423463163525

# **Hyper-Parameter Tuning**

In [None]:
features = [2,3,4,5,6]
for f in features:
  rf = RandomForestRegressor(max_features=f, random_state=25)
  rf.fit(x_train, y_train)
  y_pred = rf.predict(x_test)
  print ("max_features =",f,"Error =",mean_squared_error(y_test, y_pred))

max_features = 2 Error = 33.43890638973153
max_features = 3 Error = 32.19815697438136
max_features = 4 Error = 31.11775123798851
max_features = 5 Error = 30.469584562007235
max_features = 6 Error = 30.547323052994635


An error corresponding to max_features = 5 is the least, max_feature = 5 is the best hyper-parameter value for random forest.

In [None]:
rates = [0.1, 0.3, 0.5, 0.8]
depth = [2, 3, 4, 5]
scores = []
for r in rates:
  for d in depth:
    gbm = GradientBoostingRegressor(random_state=25, learning_rate=r, max_depth=d)
    gbm.fit(x_train, y_train)
    y_pred = gbm.predict(x_test)
    scores.append([r,d, mean_squared_error(y_test, y_pred)])

In [None]:
df_scores = pd.DataFrame(scores, columns=['learning_rate', 'max_depth', 'mse'])
df_scores.sort_values(by='mse', ascending=True)

Unnamed: 0,learning_rate,max_depth,mse
5,0.3,3,22.301056
3,0.1,5,23.838663
7,0.3,5,24.112086
8,0.5,2,24.84623
2,0.1,4,25.505446
9,0.5,3,25.806327
6,0.3,4,26.102322
4,0.3,2,26.377228
13,0.8,3,28.524357
12,0.8,2,28.969493


Inferencing steps:
1.

# **Inferencing**

In [None]:
best_model = GradientBoostingRegressor(random_state=25, learning_rate=0.3, max_depth=3)
best_model.fit(x_train, y_train)
y_pred = best_model.predict(x_test)