In [None]:
# Library to suppress warnings or deprecation notes 
import warnings
warnings.filterwarnings('ignore')

# Libraries to help with reading and manipulating data
import numpy as np
import pandas as pd

# Libraries to help with data visualization
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

# Libraries to split data, impute missing values, and scale values
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

# Libraries to import decision tree classifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
from sklearn.ensemble import StackingClassifier

# Hyperparameter tuning
from sklearn.model_selection import GridSearchCV
from sklearn import metrics
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, precision_score, recall_score

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
df = pd.read_csv("/content/drive/My Drive/no_outlier_df.csv")

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29813 entries, 0 to 29812
Data columns (total 24 columns):
 #   Column                                 Non-Null Count  Dtype  
---  ------                                 --------------  -----  
 0   area                                   29813 non-null  float64
 1   building_type                          29813 non-null  object 
 2   building_nature                        29813 non-null  object 
 3   image_url                              16299 non-null  object 
 4   num_bath_rooms                         29813 non-null  float64
 5   num_bed_rooms                          29813 non-null  float64
 6   price                                  29813 non-null  float64
 7   property_description                   16557 non-null  object 
 8   property_overview                      16321 non-null  object 
 9   property_url                           29794 non-null  object 
 10  purpose                                29813 non-null  object 
 11  ci

In [None]:
df.head(5)

Unnamed: 0,area,building_type,building_nature,image_url,num_bath_rooms,num_bed_rooms,price,property_description,property_overview,property_url,...,id,relaxation_amenity_count,security_amenity_count,maintenance_or_cleaning_amenity_count,social_amenity_count,expendable_amenity_count,service_staff_amenity_count,unclassify_amenity_count,division,zone
0,1100.0,Apartment,Residential,https://images-cdn.bproperty.com/thumbnails/80...,3.0,3.0,22000.0,1150 Square Feet Apartment For Rent In Mohamma...,The apartment of 1100 Square Feet is located ...,https://www.bproperty.com/en/property/details-...,...,bproperty-14087,0,2,2,0,5,0,4,Dhaka,Mohammadpur
1,1300.0,Apartment,Residential,https://images-cdn.bproperty.com/thumbnails/13...,3.0,3.0,40000.0,Grab This Lovely Flat For Rent In Bashundhara ...,This lovely apartment is located in a great lo...,https://www.bproperty.com/en/property/details-...,...,bproperty-14091,0,2,2,0,4,0,5,Dhaka,Bashundhara R/A
2,1500.0,Apartment,Residential,https://images-cdn.bproperty.com/thumbnails/15...,3.0,3.0,35000.0,1500 Square Feet Apartment With Necessary Resi...,"Make this beautiful 1,500 Sft apartment in Utt...",https://www.bproperty.com/en/property/details-...,...,bproperty-14092,0,1,3,2,2,0,4,Dhaka,Uttara
3,1350.0,Apartment,Residential,https://images-cdn.bproperty.com/thumbnails/15...,3.0,3.0,20000.0,Wow! This 1350 Sq Ft Apartment For Rent In Bas...,"In a very busy city like Dhaka, everyone tranc...",https://www.bproperty.com/en/property/details-...,...,bproperty-14093,0,0,0,0,4,0,3,Dhaka,Bashundhara R/A
4,1825.0,Apartment,Residential,https://images-cdn.bproperty.com/thumbnails/15...,4.0,3.0,60000.0,This 1825 Sq. Ft Apartment Will Ensure Your Go...,"In a very busy city like Dhaka, everyone tranc...",https://www.bproperty.com/en/property/details-...,...,bproperty-14094,0,2,1,0,3,0,4,Dhaka,Uttara


In [None]:
df1 = df.drop(['area','property_url', 'image_url', 'address' ,'id', 'property_description', 'property_overview'], axis = 1)

In [None]:
df1.shape

(29813, 17)

In [None]:
df1.head(5)

Unnamed: 0,building_type,building_nature,num_bath_rooms,num_bed_rooms,price,purpose,city,locality,relaxation_amenity_count,security_amenity_count,maintenance_or_cleaning_amenity_count,social_amenity_count,expendable_amenity_count,service_staff_amenity_count,unclassify_amenity_count,division,zone
0,Apartment,Residential,3.0,3.0,22000.0,Rent,Dhaka,Mohammadpur,0,2,2,0,5,0,4,Dhaka,Mohammadpur
1,Apartment,Residential,3.0,3.0,40000.0,Rent,Dhaka,Bashundhara R-A,0,2,2,0,4,0,5,Dhaka,Bashundhara R/A
2,Apartment,Residential,3.0,3.0,35000.0,Rent,Dhaka,Uttara,0,1,3,2,2,0,4,Dhaka,Uttara
3,Apartment,Residential,3.0,3.0,20000.0,Rent,Dhaka,Bashundhara R-A,0,0,0,0,4,0,3,Dhaka,Bashundhara R/A
4,Apartment,Residential,4.0,3.0,60000.0,Rent,Dhaka,Uttara,0,2,1,0,3,0,4,Dhaka,Uttara


### Data Pre-processing

In [None]:
# Calculate the percentage of missing values in each column
missing_percentage = df1.isnull().sum() / len(df1) * 100

# Create a new dataframe to store the results
missing_data = pd.DataFrame({'Variable': df1.columns, 'MissingPercentage': missing_percentage})

# Sort the dataframe by the missing percentage in descending order
missing_data.sort_values('MissingPercentage', ascending=False, inplace=True)

# Print the missing data summary
print(missing_data)


                                                                    Variable  \
zone                                                                    zone   
division                                                            division   
security_amenity_count                                security_amenity_count   
unclassify_amenity_count                            unclassify_amenity_count   
service_staff_amenity_count                      service_staff_amenity_count   
expendable_amenity_count                            expendable_amenity_count   
social_amenity_count                                    social_amenity_count   
maintenance_or_cleaning_amenity_count  maintenance_or_cleaning_amenity_count   
building_type                                                  building_type   
building_nature                                              building_nature   
locality                                                            locality   
city                                    

In [None]:
df1['purpose'].unique()

array(['Rent', 'Sale'], dtype=object)

In [None]:
df1['building_type'].unique()

array(['Apartment', 'Duplex', 'House', 'Residential Plot', 'Building',
       'Others', 'Commercial Plot', 'Shop', 'Office', 'Floor',
       'Warehouse', 'Factory'], dtype=object)

In [None]:
## feature engineering 

## ratio of bedrooms to bathrooms 

# df1['bedroom_to_bathroom_ratio'] = df1['num_bed_rooms'] / df1['num_bath_rooms']

In [None]:
## Total Amenity Count

# df1['total_amenity_count'] = df1['relaxation_amenity_count'] + df1['security_amenity_count'] + df1['maintenance_or_cleaning_amenity_count'] + df1['social_amenity_count'] + df1['expendable_amenity_count'] + df1['service_staff_amenity_count'] + df1['unclassify_amenity_count']

In [None]:
## Amenity Category Ratios

## df1['relaxation_ratio'] = df1['relaxation_amenity_count'] / df1['total_amenity_count']
## df1['security_ratio'] = df1['security_amenity_count'] / df1['total_amenity_count']
## df1['maintenance_ratio'] = df1['maintenance_or_cleaning_amenity_count'] / df1['total_amenity_count']
## df1['social_ratio'] = df1['social_amenity_count'] / df1['total_amenity_count']
## df1['expendable_ratio'] = df1['expendable_amenity_count'] / df1['total_amenity_count']
## df1['service_staff_ratio'] = df1['service_staff_amenity_count'] / df1['total_amenity_count']
## df1['unclassify_ratio'] = df1['unclassify_amenity_count'] / df1['total_amenity_count']


In [None]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29813 entries, 0 to 29812
Data columns (total 17 columns):
 #   Column                                 Non-Null Count  Dtype  
---  ------                                 --------------  -----  
 0   building_type                          29813 non-null  object 
 1   building_nature                        29813 non-null  object 
 2   num_bath_rooms                         29813 non-null  float64
 3   num_bed_rooms                          29813 non-null  float64
 4   price                                  29813 non-null  float64
 5   purpose                                29813 non-null  object 
 6   city                                   29813 non-null  object 
 7   locality                               29813 non-null  object 
 8   relaxation_amenity_count               29813 non-null  int64  
 9   security_amenity_count                 29813 non-null  int64  
 10  maintenance_or_cleaning_amenity_count  29813 non-null  int64  
 11  so

In [None]:
## 'total_amenity_count' , 'relaxation_amenity_count', 'security_amenity_count' ,'maintenance_or_cleaning_amenity_count', 'social_amenity_count', 'expendable_amenity_count','service_staff_amenity_count', 'unclassify_amenity_count

In [None]:
# Let's also identify the numeric and categorical columns.

numeric_cols = list(df1.select_dtypes(include=np.number).columns)
categorical_cols = list(df1.select_dtypes(include='object').columns)

print('numeric columns: ', numeric_cols)
print('categorical columns: ', categorical_cols)


numeric columns:  ['num_bath_rooms', 'num_bed_rooms', 'price', 'relaxation_amenity_count', 'security_amenity_count', 'maintenance_or_cleaning_amenity_count', 'social_amenity_count', 'expendable_amenity_count', 'service_staff_amenity_count', 'unclassify_amenity_count']
categorical columns:  ['building_type', 'building_nature', 'purpose', 'city', 'locality', 'division', 'zone']


In [None]:

#One-Hot encoding categotical variables in the data
df1 = pd.get_dummies(df1)
df1.shape

(29813, 343)

In [None]:
#################

In [None]:
x = df1.drop(['price'],axis=1)
y = df1[['price']]
x.shape,y.shape

((29813, 342), (29813, 1))

In [None]:
#the natural logarithm transformation to the target variable y using the np.log1p() function. This transformation is commonly used when dealing with skewed data or when trying to normalize the target variable.

y = np.log1p(y)


In [None]:
from sklearn import model_selection

# Rest of your code using model_selection module

# train-test_split
X_train, X_val, y_train, y_val = model_selection.train_test_split(x,y,test_size=0.20, random_state=42)

By using Standardscaler as an example, this approach scales the features to a specific range. It's important to fit the scaler on the training data and then apply the same transformation to the validation data. This ensures that both the training and validation sets are scaled consistently.

By applying the normalization or scaling after the train-test split, you ensure that the scaling is based solely on the training data and prevent any data leakage from the validation set. This helps to maintain the integrity of the evaluation process and provides a more accurate assessment of the model's performance.

In [None]:
# Check for missing values
missing_values = df1.isnull().sum()

# Print variables with missing values
print("Variables with missing values:")
print(missing_values[missing_values > 0])


Variables with missing values:
Series([], dtype: int64)


In [None]:
# Apply StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

In [None]:
from sklearn.linear_model import ElasticNet, Lasso,  BayesianRidge, LassoLarsIC
from sklearn.ensemble import RandomForestRegressor,  GradientBoostingRegressor
from sklearn.kernel_ridge import KernelRidge
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import RobustScaler
from sklearn.base import BaseEstimator, TransformerMixin, RegressorMixin, clone
from sklearn.model_selection import KFold, cross_val_score, train_test_split
from sklearn.metrics import mean_squared_error
import xgboost as xgb
from sklearn.linear_model import Ridge
from yellowbrick.regressor import PredictionError, ResidualsPlot
import lightgbm as lgb


The output is regression type so accuracy cannot be used as a measure to compare different models like classification models, so all the models are compared using mean absolute error (MAE) and RMSE.
**The lower the value of mean absolute error the better the model is performing and lower values of RMSE indicate better fit.**

In [None]:
def regressionmodel(model_s,X_train, X_val, y_train, y_val):
    model_s=model_s.fit(X_train,y_train)#Fitting train data for sales
    y_pred=model_s.predict(X_val)#predicting sales with test data
    print('Model parameter used are:',model_s) #Printing the model to see which parameters are used
    #Printing mean absolute error for predicting sales
    print("MAE of sales is         :", metrics.mean_absolute_error(y_val,y_pred))
    #Printing Root mean squared error for predicting sales
    print("RMSE of sales is        :",np.sqrt(metrics.mean_squared_error(y_val,y_pred)))

### Lasso Regression

In [None]:
from sklearn import linear_model

# Rest of your code using linear_model module
model_s = linear_model.Lasso(alpha=0.1)
regressionmodel(model_s,X_train, X_val, y_train, y_val)

Model parameter used are: Lasso(alpha=0.1)
MAE of sales is         : 0.5387728586048764
RMSE of sales is        : 0.6866690622077605


## Ridge Regression

In [None]:
model_s = Ridge(alpha=1.0)
regressionmodel(model_s,X_train, X_val, y_train, y_val)

Model parameter used are: Ridge()
MAE of sales is         : 0.35159931595881416
RMSE of sales is        : 0.5104855477909721


## Light Gradient Boosting Regression

In [None]:
model_s = lgb.LGBMRegressor()
regressionmodel(model_s,X_train, X_val, y_train, y_val)

Model parameter used are: LGBMRegressor()
MAE of sales is         : 0.30481038572091806
RMSE of sales is        : 0.4447292289975793


## Random Forest Regression

In [None]:
model_s = RandomForestRegressor(n_estimators=100,max_depth=10, random_state=40)
regressionmodel(model_s,X_train, X_val, y_train, y_val)

Model parameter used are: RandomForestRegressor(max_depth=10, random_state=40)
MAE of sales is         : 0.3343495465633481
RMSE of sales is        : 0.47616687447584594


## eXtreme Gradient Boosting Regression

In [None]:
model_s = xgb.XGBRegressor()
regressionmodel(model_s,X_train, X_val, y_train, y_val)

Model parameter used are: XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=None, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=None, max_leaves=None,
             min_child_weight=None, missing=nan, monotone_constraints=None,
             n_estimators=100, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state=None, ...)
MAE of sales is         : 0.2991906064525452
RMSE of sales is        : 0.4400458113948689


## Decision Tree Regression

In [None]:
model_s = tree.DecisionTreeRegressor()
regressionmodel(model_s,X_train, X_val, y_train, y_val)

Model parameter used are: DecisionTreeRegressor()
MAE of sales is         : 0.29535306821591883
RMSE of sales is        : 0.463928910162184


### For better understanding and comparision of all the scores a dataframe is created

In [None]:
#Giving column Values
Regression_data = {'Regression Model':  ['Lasso','Ridge','Light Gradient Boosting','Random Forest',
                                 'eXtreme gradient boosting','Decision tree'],
        'MAE Value for Sales' :        [0.538,0.351,0.304,0.334,0.299,0.295],
        'RMSE Value for Sales':        [0.686,0.510,0.444,0.476,0.440,0.463], }
#Creating data frame with Column Names
Regression_comparision = pd.DataFrame (Regression_data, columns = ['Regression Model','MAE Value for Sales','RMSE Value for Sales'])

In [None]:
Regression_comparision #Printing dataframe

Unnamed: 0,Regression Model,MAE Value for Sales,RMSE Value for Sales
0,Lasso,0.538,0.686
1,Ridge,0.351,0.51
2,Light Gradient Boosting,0.304,0.444
3,Random Forest,0.334,0.476
4,eXtreme gradient boosting,0.299,0.44
5,Decision tree,0.295,0.463


# Best model after tuning hyperparaters

In [None]:
def regressionmodel(model_s, X_train, X_val, y_train, y_val):
    model_s.fit(X_train, y_train)
    y_pred = model_s.predict(X_val)
    print('Model parameters used:', model_s)
    print("MAE of sales is:", metrics.mean_absolute_error(y_val, y_pred))
    print("RMSE of sales is:", np.sqrt(metrics.mean_squared_error(y_val, y_pred)))

## Lasso with hyperparameter tuning

In [None]:
from sklearn.model_selection import RandomizedSearchCV
# Define the Lasso model
model_s = linear_model.Lasso()

# Define the parameter grid for hyperparameter tuning
param_grid = {'alpha': [0.1, 1.0, 10.0]}

# Create the GridSearchCV object with cross-validation
random_search = RandomizedSearchCV(estimator=model_s, param_distributions=param_grid, n_iter=10, cv=5)

# Perform hyperparameter tuning and get the best model
best_model = random_search.fit(X_train, y_train)

# Use the best model for prediction and evaluation
regressionmodel(best_model, X_train, X_val, y_train, y_val)

# Print the best hyperparameters
print("Best hyperparameters:", best_model.best_params_)

Model parameters used: RandomizedSearchCV(cv=5, estimator=Lasso(),
                   param_distributions={'alpha': [0.1, 1.0, 10.0]})
MAE of sales is: 0.5387728586048764
RMSE of sales is: 0.6866690622077605
Best hyperparameters: {'alpha': 0.1}


## Ridge with hyperparameter tuning

In [None]:
# Define the Ridge model
model_s = linear_model.Ridge()

# Define the parameter grid for hyperparameter tuning
param_grid = {'alpha': [0.1, 1.0, 10.0]}

# Create the GridSearchCV object with cross-validation
random_search = RandomizedSearchCV(estimator=model_s, param_distributions=param_grid, n_iter=10, cv=5)

# Perform hyperparameter tuning and get the best model
best_model = random_search.fit(X_train, y_train)

# Use the best model for prediction and evaluation
regressionmodel(best_model, X_train, X_val, y_train, y_val)

# Print the best hyperparameters
print("Best hyperparameters:", best_model.best_params_)

Model parameters used: RandomizedSearchCV(cv=5, estimator=Ridge(),
                   param_distributions={'alpha': [0.1, 1.0, 10.0]})
MAE of sales is: 0.35167061814977335
RMSE of sales is: 0.5104447280490049
Best hyperparameters: {'alpha': 10.0}


## Light gradiaent boosting with hyperparameters

In [None]:
# Define the LightGBM model
model_s = lgb.LGBMRegressor()

# Define the parameter grid for hyperparameter tuning
param_grid = {
    'learning_rate': [0.1, 0.01, 0.001],
    'max_depth': [3, 5, 7],
    'num_leaves': [31, 63, 127]
}

# Create the RandomizedSearchCV object with cross-validation
random_search = RandomizedSearchCV(estimator=model_s, param_distributions=param_grid, n_iter=10, cv=5)

# Perform hyperparameter tuning and get the best model
best_model = random_search.fit(X_train, y_train)

# Use the best model for prediction and evaluation
regressionmodel(best_model, X_train, X_val, y_train, y_val)

# Print the best hyperparameters
print("Best hyperparameters:", best_model.best_params_)

Model parameters used: RandomizedSearchCV(cv=5, estimator=LGBMRegressor(),
                   param_distributions={'learning_rate': [0.1, 0.01, 0.001],
                                        'max_depth': [3, 5, 7],
                                        'num_leaves': [31, 63, 127]})
MAE of sales is: 0.30651770645629733
RMSE of sales is: 0.44819760582105556
Best hyperparameters: {'num_leaves': 127, 'max_depth': 7, 'learning_rate': 0.1}


## Random forest with hyperparamters tuning

In [None]:
# Define the Random Forest Regressor model
model_s = RandomForestRegressor()

# Define the parameter grid for hyperparameter tuning
param_grid = {
    'n_estimators': [10, 20, 50],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5, 10]
}

# Create the GridSearchCV object with cross-validation
grid_search = GridSearchCV(estimator=model_s, param_grid=param_grid, cv=5)

# Perform hyperparameter tuning and get the best model
best_model = grid_search.fit(X_train, y_train)

# Use the best model for prediction and evaluation
regressionmodel(best_model, X_train, X_val, y_train, y_val)

# Print the best hyperparameters
print("Best hyperparameters:", best_model.best_params_)

Model parameters used: GridSearchCV(cv=5, estimator=RandomForestRegressor(),
             param_grid={'max_depth': [None, 5, 10],
                         'min_samples_split': [2, 5, 10],
                         'n_estimators': [10, 20, 50]})
MAE of sales is: 0.2873727054723697
RMSE of sales is: 0.43474171815794005
Best hyperparameters: {'max_depth': None, 'min_samples_split': 10, 'n_estimators': 50}


In [None]:
from sklearn.model_selection import RandomizedSearchCV

# Define the Random Forest Regressor model# Define the Random Forest Regressor model
model_s = RandomForestRegressor()

# Define the parameter grid for hyperparameter tuning
param_grid = {
    'n_estimators': [10, 20, 50],
    'max_depth': [None],
    'min_samples_split': [2, 5, 10]
}

# Create the RandomizedSearchCV object with cross-validation
random_search = RandomizedSearchCV(estimator=model_s, param_distributions=param_grid, cv=5, n_iter=10)

# Perform hyperparameter tuning and get the best model
best_model = random_search.fit(X_train, y_train)

# Use the best model for prediction and evaluation
regressionmodel(best_model, X_train, X_val, y_train, y_val)

# Print the best hyperparameters
print("Best hyperparameters:", best_model.best_params_)

Model parameters used: RandomizedSearchCV(cv=5, estimator=RandomForestRegressor(),
                   param_distributions={'max_depth': [None],
                                        'min_samples_split': [2, 5, 10],
                                        'n_estimators': [10, 20, 50]})
MAE of sales is: 0.28689920547184805
RMSE of sales is: 0.434223576564332
Best hyperparameters: {'n_estimators': 50, 'min_samples_split': 10, 'max_depth': None}


# eXtreme Graident Boosting with hyperparameter tuning

In [None]:
## randomized search takes three less meniutes to run the model
param_grid = {
    'learning_rate': [0.1, 0.01],
    'max_depth': [None],
    'n_estimators': [10, 20, 50],
    'subsample': [0.8, 0.9]
}

# Create the XGBoost model
model_xgb = xgb.XGBRegressor()

# Create the RandomizedSearchCV object with cross-validation
random_search = RandomizedSearchCV(estimator=model_xgb, param_distributions=param_grid, cv=5, n_iter=10)

# Perform hyperparameter tuning and get the best model
best_model = random_search.fit(X_train, y_train, early_stopping_rounds=10, eval_set=[(X_val, y_val)])

# Use the best model for prediction and evaluation
regressionmodel(best_model, X_train, X_val, y_train, y_val)

# Print the best hyperparameters
print("Best hyperparameters:", best_model.best_params_)

[0]	validation_0-rmse:11.26411
[1]	validation_0-rmse:10.14257
[2]	validation_0-rmse:9.13354
[3]	validation_0-rmse:8.22542
[4]	validation_0-rmse:7.40824
[5]	validation_0-rmse:6.67312
[6]	validation_0-rmse:6.01196
[7]	validation_0-rmse:5.41727
[8]	validation_0-rmse:4.88254
[9]	validation_0-rmse:4.40152
[10]	validation_0-rmse:3.96983
[11]	validation_0-rmse:3.58129
[12]	validation_0-rmse:3.23257
[13]	validation_0-rmse:2.91944
[14]	validation_0-rmse:2.63804
[15]	validation_0-rmse:2.38584
[16]	validation_0-rmse:2.15958
[17]	validation_0-rmse:1.95678
[18]	validation_0-rmse:1.77514
[19]	validation_0-rmse:1.61367
[0]	validation_0-rmse:11.26422
[1]	validation_0-rmse:10.14237
[2]	validation_0-rmse:9.13327
[3]	validation_0-rmse:8.22506
[4]	validation_0-rmse:7.40784
[5]	validation_0-rmse:6.67291
[6]	validation_0-rmse:6.01186
[7]	validation_0-rmse:5.41717
[8]	validation_0-rmse:4.88242
[9]	validation_0-rmse:4.40160
[10]	validation_0-rmse:3.96973
[11]	validation_0-rmse:3.58133
[12]	validation_0-rmse:3

In [None]:
#Giving column Values
Regression_data = {'Regression Model with HP':  ['Lasso','Ridge','Light Gradient Boosting','Random Forest',
                                 'eXtreme gradient boosting','Decision tree'],
        'MAE Value for Sales' :        [0.538,0.351,0.306,0.286,0.331,0.295],
        'RMSE Value for Sales':        [0.686,0.510,0.448,0.434,0.473,0.463], }
#Creating data frame with Column Names
Regression_comparision = pd.DataFrame (Regression_data, columns = ['Regression Model with HP','MAE Value for Sales','RMSE Value for Sales'])

In [None]:
Regression_comparision #Printing dataframe

Unnamed: 0,Regression Model with HP,MAE Value for Sales,RMSE Value for Sales
0,Lasso,0.538,0.686
1,Ridge,0.351,0.51
2,Light Gradient Boosting,0.304,0.444
3,Random Forest,0.334,0.476
4,eXtreme gradient boosting,0.299,0.44
5,Decision tree,0.295,0.463
