In [None]:
## https://www.kaggle.com/datasets/shubhambathwal/flight-price-prediction

<img src="https://i.postimg.cc/ydZcb9cY/Hello.jpg">

<img src="https://i.postimg.cc/W4208kV2/inkpx-word-art-1.jpg">

<img src='https://i.postimg.cc/3wzWvF4d/3101522.jpg'>

<div style="border:4px solid navy; border-radius:30px; padding: 15px; background-size: cover">
<p><h2 style="color:navy; font-size:250%"><b>INTRODUCTION</b></h2></p>

<p><h5 style="font-size:130%; font-family:Arial ">The objective of the study is to analyse the flight booking dataset obtained from ‚ÄúEase My Trip‚Äù website and to conduct various statistical hypothesis tests in order to get meaningful information from it. The 'Linear Regression' statistical algorithm would be used to train the dataset and predict a continuous target variable. 'Easemytrip' is an internet platform for booking flight tickets, and hence a platform that potential passengers use to buy tickets. A thorough study of the data will aid in the discovery of valuable insights that will be of enormous value to passengers.</h5></p>

# <b><p style="background-image: url(https://i.postimg.cc/MpTLFhtf/wp9806504-network-desktop-wallpapers.jpg);background-size: cover;font-family:tahoma;font-size:120%;color:white;text-align:center;border-radius:15px 50px; padding:7px; border:solid 2px #09375b; box-shadow: 10px 10px 10px #042b4c">Table Of Content</p></b>

<a id="content"></a>    
<div style="border-radius:20px; padding: 15px; font-size:110%; text-align:left; background-image: url(https://i.postimg.cc/sXwGWcwC/download.jpg); background-size: cover">

<h2><span style="text-align:center; font-weight:bolder; color:navy; font-size:130%">Table of Contents:</span></h2>

 * **[Step 1 | Python Libraries](#setup)**
    -  [1.1 | Import Libraries](#import)
    -  [1.2 | Library configurations](#config)
 *  **[Step 2 | Data](#data)**
    -  [2.1 | Importing Data](#load_data)
    -  [2.2 | Data Informations](#info)
    -  [2.3 | Features](#features)
 *  **[Step 3 | visualizations](#visualizations)**
    -  [3.1 | Plots](#plots)
 *  **[Step 4 | Preprocessing](#prep)**
    -  [4.1 | Value](#value)
    -  [4.2 | Correlations](#corr)
    -  [4.3 | Define X ,y](#xy)
    -  [4.4 | Normalization](#norm)
 *  **[Step 5 | Modeling](#ml)**
    -  [5.1 | LinearRegression](#lr)
    -  [5.2 | DecisionTreeRegressor](#tree)
    -  [5.3 | RandomForestRegressor](#rf)
    -  [5.6 | Result](#result)
 *  **[Author](#author)**

<a id="setup"></a>
# <b><p style="background-image: url(https://i.postimg.cc/MpTLFhtf/wp9806504-network-desktop-wallpapers.jpg);background-size: cover;font-family:tahoma;font-size:120%;color:white;text-align:center;border-radius:15px 50px; padding:7px; border:solid 2px #09375b; box-shadow: 10px 10px 10px #042b4c">Step 1 | Python Libraries</p></b>

[üè† Tabel of Contents](#content)

<a id="import"></a>
## <b><span style="color:navy">‚úàÔ∏è Step 1.1 | </span><span style="color:red">Import Libraries</span></b>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from termcolor import colored
import warnings

from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics
from sklearn.model_selection import train_test_split, GridSearchCV

print(colored('\nAll libraries imported succesfully', 'green'))

<a id="config"></a>
## <b><span style="color:navy">‚úàÔ∏è Step 1.2 | </span><span style="color:red">Library configurations</span></b>

In [None]:
pd.options.mode.copy_on_write = True # Allow re-write on variable
sns.set_style('darkgrid') # Seaborn style
warnings.filterwarnings('ignore') # Ignore warnings
pd.set_option('display.max_columns', None) # Setting this option will print all collumns of a dataframe
pd.set_option('display.max_colwidth', None) # Setting this option will print all of the data in a feature

In [None]:
print(colored('\nAll libraries Configed succesfully.', 'green'))

<a id="data"></a>
# <b><p style="background-image: url(https://i.postimg.cc/MpTLFhtf/wp9806504-network-desktop-wallpapers.jpg);background-size: cover;font-family:tahoma;font-size:120%;color:white;text-align:center;border-radius:15px 50px; padding:7px; border:solid 2px #09375b; box-shadow: 10px 10px 10px #042b4c">Step 2 | Data</p></b>

[üè† Tabel of Contents](#content)

<a id="load_data"></a>
## <b><span style="color:navy">‚úàÔ∏è Step 2.1 | </span><span style="color:red">Importing Data</span></b>

In [None]:
data = pd.read_csv('Clean_Dataset.csv')
data.head()

<a id="info"></a>
## <b><span style="color:navy">‚úàÔ∏è Step 2.2 | </span><span style="color:red">Data Informations</span></b>

In [None]:
data.info()

In [None]:
data.isna().sum()

<div style="font-size:110%; font-weight:500; background-color:#d3def0">‚û°Ô∏è There is no null value.</div>

<div style="font-size:110%; font-weight:500; background-color:#d3def0">‚û°Ô∏è Drop <code>unnamed: 0</code> column.</div>

In [None]:
data.drop(columns='Unnamed: 0', inplace=True)

<div style="font-size:110%; font-weight:500; background-color:#d3def0">‚û°Ô∏è Rename class name to flight_class, Because class is python reserved name.</div>

In [None]:
data.rename(columns={'class': 'flight_class'}, inplace=True)

<a id="features"></a>
## <b><span style="color:navy">‚úàÔ∏è Step 2.3 | </span><span style="color:red">Features</span></b>

<div style="border:3px solid navy; border-radius:30px; padding: 15px; background-size: cover">
<p><h2 style="color:navy; font-size:250%"><b>FEATURES</b></h2></p>


<p><h5 style="font-size:130%; font-family:Arial ">The various features of the cleaned dataset are explained below:
    
<h4><b>1) Airline: </b></h4>
<p><h5 style="font-size:110%; font-family:Arial ">The name of the airline company is stored in the airline column. It is a categorical feature having 6 different airlines.</h5></p>
<h4><b>2) Flight: </b></h4>
<p><h5 style="font-size:110%; font-family:Arial ">Flight stores information regarding the plane's flight code. It is a categorical feature.</h5></p>
<h4><b>3) Source City: </b></h4>
<p><h5 style="font-size:110%; font-family:Arial ">City from which the flight takes off. It is a categorical feature having 6 unique cities.</h5></p>
<h4><b>4) Departure Time: </b></h4>
<p><h5 style="font-size:110%; font-family:Arial ">This is a derived categorical feature obtained created by grouping time periods into bins. It stores information about the departure time and have 6 unique time labels.</h5></p>
<h4><b>5) Stops: </b></h4>
<p><h5 style="font-size:110%; font-family:Arial ">A categorical feature with 3 distinct values that stores the number of stops between the source and destination cities.</h5></p>
<h4><b>6) Arrival Time: </b></h4>
<p><h5 style="font-size:110%; font-family:Arial ">This is a derived categorical feature created by grouping time intervals into bins. It has six distinct time labels and keeps information about the arrival time.</h5></p>
<h4><b>7) Destination City: </b></h4>
<p><h5 style="font-size:110%; font-family:Arial ">City where the flight will land. It is a categorical feature having 6 unique cities.</h5></p>
<h4><b>8) Class: </b></h4>
<p><h5 style="font-size:110%; font-family:Arial ">A categorical feature that contains information on seat class; it has two distinct values: Business and Economy.</h5></p>
<h4><b>9) Duration: </b></h4>
<p><h5 style="font-size:110%; font-family:Arial ">A continuous feature that displays the overall amount of time it takes to travel between cities in hours.</h5></p>
<h4><b>10)Days Left: </b></h4>
<p><h5 style="font-size:110%; font-family:Arial ">This is a derived characteristic that is calculated by subtracting the trip date by the booking date.</h5></p>
<h4><b>11) Price: </b></h4>
<p><h5 style="font-size:110%; font-family:Arial ">Target variable stores information of the ticket price.</h5></p>

<a id="visualizations"></a>
# <b><p style="background-image: url(https://i.postimg.cc/MpTLFhtf/wp9806504-network-desktop-wallpapers.jpg);background-size: cover;font-family:tahoma;font-size:120%;color:white;text-align:center;border-radius:15px 50px; padding:7px; border:solid 2px #09375b; box-shadow: 10px 10px 10px #042b4c">Step 3 | visualizations</p></b>

[üè† Tabel of Contents](#content)

<a id="plots"></a>
## <b><span style="color:navy">‚úàÔ∏è Step 3.1 | </span><span style="color:red">Plots</span></b>

In [None]:
# Print all columns and their indexes.
for index, value in enumerate(data.columns) :
    print(index, ":", value)

### <code>airline</code>

In [None]:
data.airline.value_counts()

In [None]:
fig, ax = plt.subplots(2, 1, figsize=(10, 8))
fig.suptitle('Air Line', fontsize=20, fontweight='bold')
plt.tight_layout()
# Top ax
labels = data.airline.value_counts().index.tolist()
explode = (0, 0, 0, 0, 0, 0.3)
ax[0].pie(data.airline.value_counts(), autopct='%.f%%', labels=labels, shadow=True, pctdistance=1.15, labeldistance=0.6, explode=explode)
ax[0].legend(bbox_to_anchor=(1, 1), loc=2, borderaxespad=5)

# Bottom ax
sns.boxplot(x='airline', y='price', data=data, ax=ax[1])

plt.show()

 - Most used airline is <code>Vistara</code> (43% of all flights, 127859 flights)
 - Second place is <code>Air_India</code> (27% of all flights, 80892 flights)
 - Less used airline is <code>SpiceJet</code> (3% of all flights, 9011 flights)

### <code>flight</code>

In [None]:
len(data.flight.value_counts())

<div style="font-size:110%; font-weight:500; background-color:#d3def0">‚û°Ô∏è Becase of larg values of unique values,  i can not plot this column.</div>

<div style="font-size:110%; font-weight:500; background-color:#d3def0">‚û°Ô∏è Flight name is like a ID, so its not nessasary for modeling and will drop from dataframe.</div>

In [None]:
data.drop(columns='flight', inplace=True)

### <code>source_city</code>

In [None]:
data.source_city.value_counts()

In [None]:
fig, ax = plt.subplots(2, 1, figsize=(10, 8))
fig.suptitle('source city', fontsize=20, fontweight='bold')
plt.tight_layout()
# Top ax
labels = data.source_city.value_counts().index.tolist()
explode = (0, 0, 0, 0, 0, 0.3)
ax[0].pie(data.source_city.value_counts(), autopct='%.f%%', labels=labels, shadow=True, pctdistance=1.2, labeldistance=0.4, explode=explode)
ax[0].legend(bbox_to_anchor=(1, 1), loc=2, borderaxespad=5)

# Bottom ax
sns.boxplot(x='source_city', y='price', data=data, ax=ax[1])
plt.show()

Almost number of all flights source is equal.

### <code>departure_time</code>

In [None]:
data.departure_time.value_counts()

In [None]:
fig, ax = plt.subplots(2, 1, figsize=(10, 8))
fig.suptitle('departure time', fontsize=20, fontweight='bold')
plt.tight_layout()
# Top ax
labels = data.departure_time.value_counts().index.tolist()
explode = (0, 0, 0, 0, 0, 0.3)
ax[0].pie(data.departure_time.value_counts(), autopct='%.f%%', labels=labels, shadow=True, pctdistance=1.2, labeldistance=0.4, explode=explode)
ax[0].legend(bbox_to_anchor=(1, 1), loc=2, borderaxespad=5)

# Bottom ax
sns.boxplot(x='departure_time', y='price', data=data, ax=ax[1])
plt.show()

Except at the end of <code>Late_Night</code>, the frequency of the rest of the departure_time is almost equal.

### <code>stops</code>

In [None]:
data.stops.value_counts()

In [None]:
fig, ax = plt.subplots(2, 1, figsize=(10, 10))
fig.suptitle('stops', fontsize=20, fontweight='bold')
plt.tight_layout()
# Top ax
labels = data.stops.value_counts().index.tolist()
explode = (0, 0, 0.3)
ax[0].pie(data.stops.value_counts(), autopct='%.f%%', labels=labels, shadow=True, pctdistance=1.2, labeldistance=0.4, explode=explode)
ax[0].legend(bbox_to_anchor=(1, 1), loc=2, borderaxespad=5)

# Bottom ax
sns.boxplot(x='stops', y='price', data=data, ax=ax[1])
plt.show()

 - Most of the flights have 1 stop (about 84% , 250863 flights).
 - Just 4% of flights have 2 or more than 2 stops during their flights.
 - About 12% of flights have not any stop.(13286 flights)

### <code>arrival_time</code>

In [None]:
data.arrival_time.value_counts()

In [None]:
fig, ax = plt.subplots(2, 1, figsize=(10, 8))
fig.suptitle('arrival time', fontsize=20, fontweight='bold')
# Top ax
labels = data.arrival_time.value_counts().index.tolist()
explode = (0, 0, 0, 0, 0, 0.3)
ax[0].pie(data.arrival_time.value_counts(), autopct='%.f%%', labels=labels, shadow=True, pctdistance=1.2, labeldistance=0.4, explode=explode)
ax[0].legend(bbox_to_anchor=(1, 1), loc=2, borderaxespad=5)

# Bottom ax
sns.boxplot(x='arrival_time', y='price', data=data, ax=ax[1])
plt.show()

### <code>destination_city</code>

In [None]:
data.destination_city.value_counts()

In [None]:
fig, ax = plt.subplots(2, 1, figsize=(10, 8))
fig.suptitle('destination city', fontsize=20, fontweight='bold')
plt.tight_layout()
# Top ax
labels = data.destination_city.value_counts().index.tolist()
explode = (0, 0, 0, 0, 0, 0.3)
ax[0].pie(data.destination_city.value_counts(), autopct='%.f%%', labels=labels, shadow=True, pctdistance=1.2, labeldistance=0.4, explode=explode)
ax[0].legend(bbox_to_anchor=(1, 1), loc=2, borderaxespad=5)

# Bottom ax
sns.boxplot(x='destination_city', y='price', data=data, ax=ax[1])
plt.show()

### <code>flight_class</code>

In [None]:
data.flight_class.value_counts()

In [None]:
fig, ax = plt.subplots(2, 1, figsize=(10, 8))
fig.suptitle('flight class', fontsize=20, fontweight='bold')
plt.tight_layout()
# Top ax
labels = data.flight_class.value_counts().index.tolist()
# explode = (0, 0, 0, 0, 0, 0.3)
ax[0].pie(data.flight_class.value_counts(), autopct='%.f%%', labels=labels, shadow=True, pctdistance=1.2, labeldistance=0.4)
ax[0].legend(bbox_to_anchor=(1, 1), loc=2, borderaxespad=5)

# Bottom ax
sns.boxplot(x='flight_class', y='price', data=data, ax=ax[1]).set_yticks(np.arange(0, 130000, 10000))
plt.show()

 - Abviously <code>Business</code> classes are more expesive than <code>Economie</code> .
 - Most of flights are <code>Economy</code> (about 69%).

### <code>duration</code>

In [None]:
len(data.duration.value_counts())

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(15, 5))
plt.tight_layout()
# Left ax
sns.lineplot(x='duration', y='price', data=data, ax=ax[0], hue='flight_class').set_xticks(np.arange(0, 50, 5))
# Right AX
sns.boxplot(data=data, x='duration', ax=ax[1])
plt.show()

 - In <code>Business class</code>, from 1 to 5 hours, price growth sharply.
 - In <code>Business class</code> for durations more than 5, Price in a channel between 45000 to 60000.
 - In <code>Economy class</code> growth of price have a linear growth and for duration=47 have a sharp raise in pricess.

### <code>days_left</code>

In [None]:
len(data.days_left.value_counts())

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(15, 5))
plt.tight_layout()
# Left ax
sns.lineplot(x='days_left', y='price', data=data, ax=ax[0]).set_xticks(np.arange(0, 50, 5))
# Right AX
sns.boxplot(data=data, x='days_left', ax=ax[1]).set_xticks(np.arange(0, 50, 5))
plt.show()

 - The earlier you book your ticket, the less you will pay(for values greater than 2)

### <code>price</code>

In [None]:
plt.figure(figsize=(15, 3))
sns.boxplot(data=data, x='price')
plt.show()

<a id="prep"></a>
# <b><p style="background-image: url(https://i.postimg.cc/MpTLFhtf/wp9806504-network-desktop-wallpapers.jpg);background-size: cover;font-family:tahoma;font-size:120%;color:white;text-align:center;border-radius:15px 50px; padding:7px; border:solid 2px #09375b; box-shadow: 10px 10px 10px #042b4c">Step 4 | Preprocessing</p></b>

[üè† Tabel of Contents](#content)

<a id="value"></a>
## <b><span style="color:navy">‚úàÔ∏è Step 4.1 | </span><span style="color:red">Value</span></b>

##  Replace string values with integer

In [None]:
# col : airline
data.airline = data.airline.replace(
    {
        'Vistara' : 1,
        'Air_India' : 2,
        'Indigo' : 3,
        'GO_FIRST' : 4,
        'AirAsia': 5,
        'SpiceJet' : 6  
    }
)

In [None]:
# col : source_city
data.source_city = data.source_city.replace(
    {
        'Delhi' : 1,
        'Mumbai' : 2,
        'Bangalore' : 3,
        'Kolkata' : 4,
        'Hyderabad'  : 5,
        'Chennai' : 6
    }
)

In [None]:
# col : departure_time
data.departure_time = data.departure_time.replace(
    {
        'Morning' : 1,
        'Early_Morning' : 2, 
        'Evening' : 3,
        'Night' : 4,
        'Afternoon' : 5, 
        'Late_Night' : 6
    }
)

In [None]:
# col : stops
data.stops = data.stops.replace(
    {
        'one' : 1,
        'zero' : 2,
        'two_or_more' : 3
    }
)

In [None]:
# col : arrival_time
data.arrival_time = data.arrival_time.replace(
    {
        'Night' : 1,
        'Evening' : 2,
        'Morning' : 3,
        'Afternoon' : 4,
        'Early_Morning' : 5,
        'Late_Night' : 6
    }
)

In [None]:
# col : destination_city
data.destination_city = data.destination_city.replace(
    {
        'Mumbai' : 1,
        'Delhi' : 2,
        'Bangalore' : 3,
        'Kolkata' : 4,
        'Hyderabad' : 5,
        'Chennai' : 6
    }
)

In [None]:
# col : flight_class
data.flight_class = data.flight_class.replace(
    {
        'Economy' : 1,
        'Business' :2
    }
)

In [None]:
data.describe()

<a id="corr"></a>
## <b><span style="color:navy">‚úàÔ∏è Step 4.2 | </span><span style="color:red">Correlations</span></b>

In [None]:
corr = data.corr()
plt.figure(figsize=(10, 5))
sns.heatmap(corr, annot=True, fmt='.2f', linewidths=0.5, linecolor='white', mask=np.triu(corr), cmap='Blues')
plt.show()

- <code>Flight classes</code> is most relevant feature to <code>Price</code>.

<a id="xy"></a>
## <b><span style="color:navy">‚úàÔ∏è Step 4.3 | </span><span style="color:red">Define X ,y</span></b>

In [None]:
X_temp = data.drop(columns='price')
y = data.price

<a id="norm"></a>
## <b><span style="color:navy">‚úàÔ∏è Step 4.4 | </span><span style="color:red">Normalization</span></b>

In [None]:
scaler = MinMaxScaler().fit_transform(X_temp)
X = pd.DataFrame(scaler, columns=X_temp.columns)

In [None]:
X.describe().T[['min', 'mean', 'std', '50%', 'max']].style.background_gradient(axis=1)

In [None]:
main_X = X.copy()

<a id="ml"></a>
# <b><p style="background-image: url(https://i.postimg.cc/MpTLFhtf/wp9806504-network-desktop-wallpapers.jpg);background-size: cover;font-family:tahoma;font-size:120%;color:white;text-align:center;border-radius:15px 50px; padding:7px; border:solid 2px #09375b; box-shadow: 10px 10px 10px #042b4c">Step 5 | Modeling</p></b>

[üè† Tabel of Contents](#content)

<a id="lr"></a>
## <b><span style="color:navy">‚úàÔ∏è Step 5.1 | </span><span style="color:red">LinearRegression</span></b>

In [None]:
# Create a loop to find best test_size
test_list = []
mse_list = []
r2score_list = []
best_r2=0
best_mse=0
best_test=0

for tester in range(6, 19) :
    tester = round(0.025 * tester, 2)
    test_list.append(tester)
    #
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=tester, random_state=0)
    #
    lr = LinearRegression().fit(X, y)
    y_pred_lr = lr.predict(X_test)
    r2score = metrics.r2_score(y_test, y_pred_lr)
    r2score_list.append(r2score)
    mse = metrics.mean_squared_error(y_test, y_pred_lr)
    mse_list.append(mse)
    #
    if r2score>best_r2 :
        best_r2 = r2score
        best_mse = mse
        best_test = tester
print(colored('Best test_size : {}'.format(best_test), 'blue'))
print(colored('Best R2Score : {}'.format(best_r2), 'blue'))
print(colored('Best Mean Squared Error : {}'.format(best_mse), 'blue'))

# Plot
fig, ax = plt.subplots(1, 2, figsize=(12, 4))
ax[0].plot(test_list, r2score_list, c='blue', label='R2Score')
ax[0].set_title("R2Score")
ax[0].legend()

ax[1].plot(test_list, mse_list, c='red', label='Mean Squared Error')
ax[1].set_title("Mean Squared Error")
ax[1].legend()
plt.show()

<div style="font-size:110%; font-weight:500; background-color:#d3def0">‚û°Ô∏è acc = 89.66%</div>
<div style="font-size:110%; font-weight:500; background-color:#d3def0">‚û°Ô∏è Use most relevant feature to target : <code>flight_class</code>  to increase accuracy</div>

In [None]:
for power in range(2, 7) :
    new_col_name = 'flight_class^' + str(power)
    X[new_col_name] = pow(X.flight_class, power)
    #
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=best_test, random_state=0)
    #
    lr = LinearRegression().fit(X_train, y_train)
    y_pred = lr.predict(X_test)
    r2score = metrics.r2_score(y_test, y_pred)
    diff = r2score-best_r2
    if r2score > best_r2 :
        best_r2 = r2score
        print(colored('{} improve r2score by {}'.format(new_col_name, diff), 'green'))
    else :
        X.drop(columns=[new_col_name], inplace=True)
        print(colored('{} Not improve r2score'.format(new_col_name), 'red'))


In [None]:
columns = X.columns
for col in columns :
    new_col_name = 'flight_class*' + col
    X[new_col_name] = X.flight_class * X[col]
    #
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=best_test, random_state=0)
    #
    lr = LinearRegression().fit(X_train, y_train)
    y_pred = lr.predict(X_test)
    r2score = metrics.r2_score(y_test, y_pred)
    diff = r2score-best_r2
    if (r2score > best_r2) and (diff>=0.001) :
        best_r2 = r2score
        print(colored('{} improve r2score by {}'.format(new_col_name, diff), 'green'))
    else :
        X.drop(columns=[new_col_name], inplace=True)
        print(colored('{} Not improve r2score'.format(new_col_name), 'red'))


In [None]:
lr_r2 = best_r2
print(colored('Liear Legresion R2Score = {}'.format(round(lr_r2, 3)), 'green'))

<a id="tree"></a>
## <b><span style="color:navy">‚úàÔ∏è Step 5.2 | </span><span style="color:red">DecisionTreeRegressor</span></b>

In [None]:
parameters = {
    'splitter' : ['best', 'random'],
    'max_features' : [None, 8, 7, 6, 5],
}

X_train, X_test, y_train, y_test = train_test_split(main_X, y, test_size=tester, random_state=0)


tree = DecisionTreeRegressor()
tree_cv = GridSearchCV(estimator=tree, param_grid=parameters, cv=20).fit(X_train, y_train)

print(colored('Tuned Hyper Parameters :\n{}'.format(tree_cv.best_params_), 'blue'))

In [None]:
tree = DecisionTreeRegressor(**tree_cv.best_params_).fit(X_train, y_train)
y_pred_tree = tree.predict(X_test)
tree_r2 = metrics.r2_score(y_test, y_pred_tree)
print(colored('DecisionTreeRegressor R2Score = {}'.format(round(tree_r2, 3)), 'green'))

<a id="rf"></a>
## <b><span style="color:navy">‚úàÔ∏è Step 5.3 | </span><span style="color:red">RandomForestRegressor</span></b>

In [None]:
X_train, X_test, y_train, y_test = train_test_split(main_X, y, test_size=tester, random_state=0)

rf = RandomForestRegressor(n_estimators=500, max_features=8, n_jobs=-1).fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
rf_r2 = metrics.r2_score(y_test, y_pred_rf)
print(colored('RandomForestRegressor R2Score = {}'.format(round(rf_r2, 3)), 'green'))

<a id="result"></a>
## <b><span style="color:navy">‚úàÔ∏è Step 4.4 | </span><span style="color:red">Result</span></b>

In [None]:
result = pd.DataFrame({
    'Algorithms' : ['LinearRegression', 'DecisionTreeRegressor', 'RandomForestRegressor'],
    'R2Scores' : [lr_r2, tree_r2, rf_r2]
})
plt.figure(figsize=(8, 4))
ax = sns.barplot(x='Algorithms', y='R2Scores', data=result, palette='Set1')
for container in ax.containers:
    ax.bar_label(container)
plt.show()

### <div style="font-size:110%; font-weight:500; background-color:#d3def0">‚û°Ô∏è <code>RandomForestRegressor</code> have maximum R2Score and choose as best model.</div>

<a id="author"></a>
<div style="border:3px solid navy; border-radius:30px; padding: 15px; background-size: cover; font-size:100%; text-align:left; background-image: url(https://i.postimg.cc/sXwGWcwC/download.jpg); background-size: cover">

<h4 align="left"><span style="font-weight:700; font-size:150%"><font color=#d10202>Author:</font><font color=navy> Nima Pourmoradi</font></span></h4>
<h6 align="left"><font color=#ff6200><a href='https://github.com/NimaPourmoradi'>github: https://github.com/NimaPourmoradi</font></h6>
<h6 align="left"><font color=#ff6200><a href='https://www.kaggle.com/nimapourmoradi'>kaggle : https://www.kaggle.com/nimapourmoradi</a></font></h6>
<h6 align="left"><font color=#ff6200><a href='https://www.linkedin.com/in/nima-pourmoradi-081949288/'>linkedin : www.linkedin.com/in/nima-pourmoradi</a></font></h6>
<h6 align="left"><font color=#ff6200><a href='https://t.me/Nima_Pourmoradi'>Telegram : https://t.me/Nima_Pourmoradi</a></font></h6>

<img src="https://i.postimg.cc/t4b3WtCy/1000-F-291522205-Xkrm-S421-Fj-SGTMR.jpg">