In [1]:
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from sklearn.compose import ColumnTransformer, make_column_selector
from sklearn.linear_model import ElasticNet
from sklearn.ensemble import RandomForestRegressor
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from scipy.stats import uniform, randint
from xgboost import XGBRegressor
from paramFunctions import *
import warnings

warnings.filterwarnings("ignore")

In [2]:
df_Flight_price = pd.read_csv("../data/Flight_price.csv")[:10000]

X_flight = df_Flight_price.drop(columns=["Unnamed: 0",'flight', "price"], axis=1)
Y_flight = df_Flight_price['price']

X_flight_train, X_flight_test, y_flight_train, y_flight_test = train_test_split(X_flight, Y_flight, test_size=0.3, random_state=42)

In [3]:
si_cat=SimpleImputer(strategy="constant", fill_value='NO INFORMATION')
si_num = SimpleImputer(strategy='median')
scaler=MinMaxScaler()

numerical_pipeline = Pipeline(steps=[
    ('imputer', si_num),
    ('scaler', scaler)
    ])
    
category_pipeline = Pipeline(steps=[
    ('imputer', si_cat),
    ('encoder', OneHotEncoder(handle_unknown='ignore'))
    ])

col_transformer = ColumnTransformer(transformers=[
    ('num', numerical_pipeline, make_column_selector(dtype_include=np.number)),
    ('cat', category_pipeline, make_column_selector(dtype_include='object'))
    ],
    remainder='drop',
    n_jobs=-1)  

In [4]:
RF_regressor = Pipeline([('preprocessor', col_transformer),
                         ('model', RandomForestRegressor())])
ElasticNet_regressor = Pipeline([('preprocessor', col_transformer),
                                 ('model', ElasticNet())])    
XGB_regressor = Pipeline([('preprocessor', col_transformer),
                          ('model', XGBRegressor())])  

In [5]:
param_distribution_rf = {
    'model__n_estimators': randint(1,500),
    'model__min_samples_split': randint(2, 10),
    'model__min_samples_leaf': randint(2, 10)
}

param_distributions_enet = {
    'model__alpha': uniform(0, 1),
    'model__l1_ratio': uniform(0, 1),
}

param_distributions_xgb = {
    'model__min_child_weight': [2 ** uniform.rvs(0, 7) for _ in range(100)],
    'model__max_depth': randint(1, 15),
    'model__colsample_bytree': uniform(0, 1)
}

In [6]:
param_list_rf, score_list_rf = get_param_score_list(X_flight_train,y_flight_train,X_flight_test, y_flight_test, RF_regressor, param_distribution_rf, 160)

In [7]:
param_list_enet, score_list_enet = get_param_score_list(X_flight_train,y_flight_train,X_flight_test, y_flight_test, ElasticNet_regressor, param_distributions_enet, 160)

In [8]:
param_list_xgb, score_list_xgb = get_param_score_list(X_flight_train,y_flight_train,X_flight_test, y_flight_test, XGB_regressor, param_distributions_xgb, 160)

In [None]:
save_best_results('flight', 'ElasticNet', param_list_enet, score_list_enet)
save_best_results('flight', 'XGB', param_list_xgb, score_list_xgb)
save_best_results('flight', 'RandomForest', param_list_rf, score_list_rf)

In [9]:
list_1 = list(range(2, 12, 2))  
list_2 = list(range(20, 151, 10))  

iter_number = list_1 + list_2

In [10]:
r2_iteration_plot(iter_number, score_list_rf,'flight', 'rf')

In [11]:
r2_iteration_plot(iter_number, score_list_enet,'flight', 'enet')

In [12]:
r2_iteration_plot(iter_number, score_list_xgb,'flight', 'xgb')

In [13]:
r2_all_models(iter_number, score_list_rf,score_list_xgb, score_list_enet, 'flight')