In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [184]:
import pandas as pd
import numpy as np
import keras_tuner
from config import Config
from model import build_model
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import MinMaxScaler
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split

In [134]:
data = pd.read_csv('/content/new_data.csv')
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 119390 entries, 0 to 119389
Data columns (total 25 columns):
 #   Column                          Non-Null Count   Dtype  
---  ------                          --------------   -----  
 0   hotel                           119390 non-null  object 
 1   is_canceled                     119390 non-null  int64  
 2   lead_time                       119390 non-null  int64  
 3   adults                          119390 non-null  int64  
 4   children                        119386 non-null  float64
 5   babies                          119390 non-null  int64  
 6   meal                            119390 non-null  object 
 7   country                         118902 non-null  object 
 8   market_segment                  119390 non-null  object 
 9   distribution_channel            119390 non-null  object 
 10  is_repeated_guest               119390 non-null  int64  
 11  previous_cancellations          119390 non-null  int64  
 12  previous_booking

In [135]:
y = data['is_canceled'].values.reshape(-1, 1)

In [136]:
data = data.drop(columns = ['is_canceled','adults','children','babies','meal','days_in_waiting_list'])

In [137]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 119390 entries, 0 to 119389
Data columns (total 19 columns):
 #   Column                          Non-Null Count   Dtype  
---  ------                          --------------   -----  
 0   hotel                           119390 non-null  object 
 1   lead_time                       119390 non-null  int64  
 2   country                         118902 non-null  object 
 3   market_segment                  119390 non-null  object 
 4   distribution_channel            119390 non-null  object 
 5   is_repeated_guest               119390 non-null  int64  
 6   previous_cancellations          119390 non-null  int64  
 7   previous_bookings_not_canceled  119390 non-null  int64  
 8   reserved_room_type              119390 non-null  object 
 9   assigned_room_type              119390 non-null  object 
 10  booking_changes                 119390 non-null  int64  
 11  deposit_type                    119390 non-null  object 
 12  customer_type   

In [138]:
#ordinal features: reservation_status, customer_type,deposit_type,assigned_room_type , reserved_room_type
#nominal: season , hotel,country, market_segment,distribution_channel,is_repeated_guest
x_numeric = data[['stays_total','total_of_special_requests',
                  'required_car_parking_spaces','booking_changes','previous_bookings_not_canceled',
                  'previous_cancellations','lead_time','adr']].values


In [139]:
encoder = OneHotEncoder()
one_hot_encoded = encoder.fit_transform(
    data[['season' , 'hotel','country', 'market_segment','distribution_channel','is_repeated_guest']])



In [140]:
X_norminal = one_hot_encoded.toarray()


In [141]:
encoder = OrdinalEncoder()

ordinal_encoded = encoder.fit_transform(data[['customer_type', 'assigned_room_type','deposit_type']])

In [142]:
X_ord = ordinal_encoded


In [143]:
X = np.hstack((x_numeric ,X_ord ,X_norminal))
X.shape

(119390, 210)

In [144]:
scalar= MinMaxScaler()
X= scalar.fit_transform(X)
y=scalar.fit_transform(y)

In [146]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=2024)

In [147]:
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

In [148]:

X_train, X_val, y_train, y_val = train_test_split(X_train_resampled,y_train_resampled, test_size=0.2, random_state=42)

In [180]:
config = Config(X_train.shape[1],2,16,4,16,step=2)

In [150]:
y_train = y_train.astype(int)
y_val = y_val.astype(int)
y_test = y_test.astype(int)

In [178]:

y_train.reshape(-1,1)

array([[1],
       [0],
       [0],
       ...,
       [1],
       [0],
       [0]])

In [181]:
tuner = keras_tuner.RandomSearch(
    lambda hp: build_model(hp,config),
    objective='val_accuracy',
    max_trials=10,
    executions_per_trial=3,
    directory='my_dir',
    project_name='classification'
)

Reloading Tuner from my_dir/classification/tuner0.json


In [182]:
tuner.search(X_train, y_train.reshape(-1,1), epochs=50, validation_data=(X_val, y_val.reshape(-1,1)))

best_model = tuner.get_best_models(num_models=1)[0]

best_model.fit(X_train, y_train, epochs=50, validation_data=(X_val, y_val))

test_loss, test_accuracy = best_model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy}")


Search: Running Trial #3

Value             |Best Value So Far |Hyperparameter
4                 |4                 |num_layers
4                 |4                 |units_0
relu              |relu              |activation_0
4                 |4                 |units_1
tanh              |tanh              |activation_1
0.0001            |0.0001            |learning_rate
4                 |4                 |units_2
relu              |relu              |activation_2
4                 |4                 |units_3
relu              |relu              |activation_3
4                 |4                 |units_4
tanh              |tanh              |activation_4
4                 |4                 |units_5
relu              |relu              |activation_5
4                 |4                 |units_6
tanh              |tanh              |activation_6
4                 |4                 |units_7
tanh              |tanh              |activation_7
4                 |4                 |units

KeyboardInterrupt: 

In [183]:
config = Config(X_train.shape[1],2,26,4,20,step=4)
tuner = keras_tuner.RandomSearch(
    lambda hp: build_model(hp,config),
    objective='val_accuracy',
    max_trials=4,
    executions_per_trial=3,
    directory='my_dir',
    project_name='classification'
)
tuner.search(X_train, y_train.reshape(-1,1), epochs=80, validation_data=(X_val, y_val.reshape(-1,1)))

best_model = tuner.get_best_models(num_models=1)[0]

best_model.fit(X_train, y_train, epochs=80, validation_data=(X_val, y_val))

test_loss, test_accuracy = best_model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy}")

Reloading Tuner from my_dir/classification/tuner0.json

Search: Running Trial #3

Value             |Best Value So Far |Hyperparameter
4                 |12                |num_layers
4                 |4                 |units_0
relu              |tanh              |activation_0
4                 |4                 |units_1
tanh              |tanh              |activation_1
0.0001            |0.0001            |learning_rate
4                 |4                 |units_2
relu              |relu              |activation_2
4                 |4                 |units_3
relu              |relu              |activation_3
4                 |4                 |units_4
tanh              |relu              |activation_4
4                 |4                 |units_5
relu              |relu              |activation_5
4                 |4                 |units_6
tanh              |relu              |activation_6
4                 |4                 |units_7
tanh              |relu              |a

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/80
[1m2638/2638[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 2ms/step - accuracy: 0.6610 - loss: 0.6624 - val_accuracy: 0.7607 - val_loss: 0.5522
Epoch 2/80
[1m2638/2638[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 3ms/step - accuracy: 0.7673 - loss: 0.5359 - val_accuracy: 0.7843 - val_loss: 0.4949
Epoch 3/80
[1m2638/2638[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 4ms/step - accuracy: 0.7881 - loss: 0.4875 - val_accuracy: 0.7890 - val_loss: 0.4665
Epoch 4/80
[1m2638/2638[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 3ms/step - accuracy: 0.7907 - loss: 0.4623 - val_accuracy: 0.7940 - val_loss: 0.4477
Epoch 5/80
[1m2638/2638[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - accuracy: 0.7947 - loss: 0.4420 - val_accuracy: 0.7983 - val_loss: 0.4282
Epoch 6/80
[1m2638/2638[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 2ms/step - accuracy: 0.7985 - loss: 0.4262 - val_accuracy: 0.7992 - val_loss: 0.4137
Epoch 7/80


KeyboardInterrupt: 