In [16]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense , Activation, Dropout,BatchNormalization,Input
from tensorflow.keras.optimizers import Adam ,RMSprop
from tensorflow.keras.models import Model
from tensorflow.keras import  backend as K
from tensorflow.keras.callbacks import ModelCheckpoint,EarlyStopping

from sklearn.model_selection import train_test_split, ParameterGrid, ParameterSampler
from random import random,randrange
import timeit

from FCMnR import FCMnR_model

In [20]:
# Thanks to https://www.kaggle.com/code/julienjta/flight-price-prediction-98-47-r2-score
def preprocessing(df):
    #Encode the ordinal variables "stops" and "class".
    df["stops"] = df["stops"].replace({'zero':0,'one':1,'two_or_more':2}).astype(int)
    df["class"] = df["class"].replace({'Economy':0,'Business':1}).astype(int)
    
    #Create the dummy variables for the cities, the times and the airlines.
    dummies_variables = ["airline","source_city","destination_city","departure_time","arrival_time"]
    dummies = pd.get_dummies(df[dummies_variables], drop_first= True)
    df = pd.concat([df,dummies],axis=1)
    
    #Create the dummy variables for the cities, the times and the airlines.
    df = df.drop(["flight","airline","source_city","destination_city","departure_time","arrival_time"],axis=1)
    
    return df

In [25]:
def load_data():
    df = pd.read_csv("Clean_Dataset.csv",index_col=0)

    df = preprocessing(df)
    print("There are {} observations for {} predictors.".format(df.shape[0],df.shape[1]))
    df.head()    
    X = df.copy()
    y = X.pop("price")
    xtrain,xtest,ytrain,ytest = train_test_split(X,y,random_state = 1,test_size=0.2, shuffle=True)
    xtrain,xvalid,ytrain,yvalid = train_test_split(xtrain,ytrain,random_state = 1,test_size=0.2, shuffle=True)
    return xtrain,xtest,xvalid,yvalid,ytrain,ytest


In [30]:
xtrain,xtest,xvalid,yvalid,ytrain,ytest=load_data()
xtrain.head()

There are 300153 observations for 30 predictors.


Unnamed: 0,stops,class,duration,days_left,airline_Air_India,airline_GO_FIRST,airline_Indigo,airline_SpiceJet,airline_Vistara,source_city_Chennai,...,departure_time_Early_Morning,departure_time_Evening,departure_time_Late_Night,departure_time_Morning,departure_time_Night,arrival_time_Early_Morning,arrival_time_Evening,arrival_time_Late_Night,arrival_time_Morning,arrival_time_Night
47870,1,0,10.17,26,0,0,0,0,1,0,...,0,0,0,1,0,0,1,0,0,0
82547,1,0,4.92,39,0,0,1,0,0,0,...,0,0,0,0,0,0,1,0,0,0
112828,1,0,12.92,28,1,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,1
125170,0,0,2.42,36,0,0,0,0,1,0,...,1,0,0,0,0,0,0,0,1,0
100057,1,0,4.42,35,0,0,1,0,0,0,...,0,1,0,0,0,0,0,0,0,1


In [32]:

model=FCMnR_model(3, (29))

callback_checkpoint = ModelCheckpoint(
  "chkpt-FCMnR.h5",
  verbose=1,
  monitor='val_loss',
  save_best_only=True
)
model.compile(
  optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
  loss=tf.keras.losses.mean_absolute_error,
  metrics=[tf.keras.metrics.MeanAbsoluteError()])

# plot_model(model)
start_time = timeit.default_timer()
history = model.fit(
    xtrain,
    ytrain,
  batch_size=64,
  epochs=10000,
  callbacks=[callback_checkpoint,EarlyStopping(patience=5)],
  validation_data=(xvalid,yvalid),
)

end_time = timeit.default_timer()

print("Total Time:",end_time-start_time)

Train on 192097 samples, validate on 48025 samples
Epoch 1/10000
Epoch 00001: val_loss improved from inf to 17684.32575, saving model to chkpt-FCMnR.h5
Epoch 2/10000
Epoch 00002: val_loss improved from 17684.32575 to 9318.58155, saving model to chkpt-FCMnR.h5
Epoch 3/10000
Epoch 00003: val_loss improved from 9318.58155 to 3998.36478, saving model to chkpt-FCMnR.h5
Epoch 4/10000

KeyboardInterrupt: 