In [61]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import keras_tuner
import keras

In [62]:
csv_file_path = './airbnb_dataset/milestone3.csv'
df = pd.read_csv(csv_file_path)
pd.set_option('display.max_columns', None)
df.head()

Unnamed: 0,log_price,property_type,accommodates,bathrooms,host_response_rate,bedrooms,beds,room_type_Private room,room_type_Shared room,bed_type_Couch,bed_type_Futon,bed_type_Pull-out Sofa,bed_type_Real Bed,cancellation_policy_moderate,cancellation_policy_strict,cancellation_policy_super_strict_30,cancellation_policy_super_strict_60,city_Chicago,city_DC,city_LA,city_NYC,city_SF,cleaning_fee_True,host_has_profile_pic_t,property_type_encoded,des_sentiment_analysis,name_sentiment_analysis
0,5.010635,Apartment,-0.072621,-0.404046,0.296013,-0.312048,-0.566461,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,1,-0.245434,0.216242,-0.435248
1,5.129899,Apartment,1.783653,-0.404046,0.296013,2.034955,1.027816,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,1,-0.245434,0.837002,-1.788095
2,4.976734,Apartment,0.855516,-0.404046,0.296013,-0.312048,1.027816,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,1,1,-0.245434,0.808508,-0.133695
3,6.620073,House,0.391448,-0.404046,0.296013,0.861454,0.230678,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,1,0.158745,0.213627,-0.926212
4,4.744932,Apartment,-0.536689,-0.404046,0.296013,-1.485549,-0.566461,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,1,1,-0.245434,-0.101872,-0.095776


In [63]:
features = [col for col in df.columns if col != 'log_price' and df[col].dtype in ['int64', 'float64']]
X = df[features]
y = df['log_price']

In [64]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Train with NN

In [65]:
# Uncomment below if tensorflow is not installed already
#%pip install tensorflow 
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [66]:
# Build Neural Network
nn_model = Sequential([
    Dense(32, activation='relu', input_shape=(X_train.shape[1])),
    Dense(16, activation='relu'),
    Dense(8, activation='relu'),
    Dense(1)
])

nn_model.compile(optimizer='adam', loss='mean_squared_error')

TypeError: 'int' object is not iterable

In [None]:
# Train the model
history = nn_model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
# Evaluate the model
mse = nn_model.evaluate(X_test, y_test)
print(f"Mean Squared Error on Test Set: {mse}")

Mean Squared Error on Test Set: 0.19590051472187042


### Hyper Tuning

In [None]:
def build_hp_model(hp):
    model = Sequential()
    for i in range(hp.Int('num_layers', 1, 5)):
        model.add(Dense(units=hp.Int('units_' + str(i), min_value=32, max_value=128, step=32),
                        activation='relu'))
    model.add(Dense(1))
    learning_rate = hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
                  loss='mean_squared_error',
                  metrics=['mean_squared_error'])
    return model

In [67]:
early_stopping = keras.callbacks.EarlyStopping(
    monitor='loss',
    min_delta=0.001,
    patience=5,
    mode='auto',
    baseline=None,
    restore_best_weights=False,
    start_from_epoch=0
)

model_checkpoint = keras.callbacks.ModelCheckpoint(
    filepath='checkpoints',
    monitor='loss',
    save_best_only=True,
    save_weights_only= False,
    mode='auto'
)

TypeError: EarlyStopping.__init__() got an unexpected keyword argument 'start_from_epoch'

In [68]:
tuner = keras_tuner.RandomSearch(
    hypermodel=build_hp_model,
    objective='val_mean_squared_error',
    max_trials=3,
    executions_per_trial=2,
    overwrite=True,
    tune_new_entries=True,
    allow_new_entries=True,
)

In [55]:
# Start searching
tuner_search = tuner.search(X_train, y_train, epochs=1, callbacks=[early_stopping, model_checkpoint], validation_split=0.2)

Trial 5 Complete [00h 00m 08s]
val_mean_squared_error: 0.22082024812698364

Best val_mean_squared_error So Far: 0.21091396609942117
Total elapsed time: 00h 00m 35s


In [56]:
# View summary of searching
tuner.results_summary()

Results summary
Results in ./untitled_project
Showing 10 best trials
Objective(name="val_mean_squared_error", direction="min")

Trial 1 summary
Hyperparameters:
num_layers: 1
units_0: 128
learning_rate: 0.002007431323449956
units_1: 96
units_2: 64
units_3: 96
Score: 0.21091396609942117

Trial 2 summary
Hyperparameters:
num_layers: 4
units_0: 96
learning_rate: 0.0008637364629284172
units_1: 32
units_2: 64
units_3: 64
Score: 0.21613024175167084

Trial 3 summary
Hyperparameters:
num_layers: 2
units_0: 64
learning_rate: 0.0008167350817418104
units_1: 64
units_2: 64
units_3: 128
Score: 0.21701422333717346

Trial 0 summary
Hyperparameters:
num_layers: 4
units_0: 64
learning_rate: 0.0005049588900001827
units_1: 32
units_2: 32
units_3: 32
Score: 0.2182495097319285

Trial 4 summary
Hyperparameters:
num_layers: 5
units_0: 32
learning_rate: 0.0009337117342558133
units_1: 32
units_2: 32
units_3: 64
units_4: 32
Score: 0.22082024812698364


In [57]:
# View best hyperparameters
best_hp = tuner.get_best_hyperparameters()[0]
best_hp.values

{'num_layers': 1,
 'units_0': 128,
 'learning_rate': 0.002007431323449956,
 'units_1': 96,
 'units_2': 64,
 'units_3': 96}

In [59]:
# View best model
best_model = tuner.get_best_models()[0]
best_model.summary()




ValueError: This model has not yet been built. Build the model first by calling `build()` or by calling the model on a batch of data.

In [None]:
# Evaluate the best model
loss, acc = best_model.evaluate(X_test, y_test)
print('ACC: ', acc)
print('LOSS: ', loss)

### Train With XGB

In [9]:
#Uncomment if first time run
#%pip install xgboost
import xgboost as xgb
from sklearn.metrics import mean_squared_error

dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

#hyperparam may need here
param = {
    'max_depth': 5,  
    'eta': 0.3,  
    'objective': 'reg:squarederror',  
    'eval_metric': 'rmse'  
}


In [10]:
num_round = 100 
model = xgb.train(param, dtrain, num_round, evals=[(dtrain, 'train')], early_stopping_rounds=10)
y_pred = model.predict(dtrain)

mse = mean_squared_error(y_train, y_pred)
rmse = np.sqrt(mse)
print("Train RMSE: %f" % (rmse))
print("Train MSE: %f" % (mse))

[0]	train-rmse:0.59766
[1]	train-rmse:0.52831
[2]	train-rmse:0.48884
[3]	train-rmse:0.46572
[4]	train-rmse:0.45221
[5]	train-rmse:0.44386
[6]	train-rmse:0.43829
[7]	train-rmse:0.43369
[8]	train-rmse:0.43119
[9]	train-rmse:0.42911
[10]	train-rmse:0.42708
[11]	train-rmse:0.42557
[12]	train-rmse:0.42471
[13]	train-rmse:0.42342
[14]	train-rmse:0.42230
[15]	train-rmse:0.42161
[16]	train-rmse:0.42120
[17]	train-rmse:0.42024
[18]	train-rmse:0.41965
[19]	train-rmse:0.41919
[20]	train-rmse:0.41876
[21]	train-rmse:0.41845
[22]	train-rmse:0.41778
[23]	train-rmse:0.41731
[24]	train-rmse:0.41707
[25]	train-rmse:0.41654
[26]	train-rmse:0.41623
[27]	train-rmse:0.41574
[28]	train-rmse:0.41528
[29]	train-rmse:0.41510
[30]	train-rmse:0.41469
[31]	train-rmse:0.41432
[32]	train-rmse:0.41410
[33]	train-rmse:0.41392
[34]	train-rmse:0.41368
[35]	train-rmse:0.41351
[36]	train-rmse:0.41289
[37]	train-rmse:0.41241
[38]	train-rmse:0.41201
[39]	train-rmse:0.41173
[40]	train-rmse:0.41140
[41]	train-rmse:0.41119
[4

In [11]:
model = xgb.train(param, dtrain, num_round, evals=[(dtest, 'test')], early_stopping_rounds=10)

y_pred = model.predict(dtest)

mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
print("Test RMSE: %f" % (rmse))
print("Test MSE: %f" % (mse))

[0]	test-rmse:0.60615
[1]	test-rmse:0.53566
[2]	test-rmse:0.49533
[3]	test-rmse:0.47205
[4]	test-rmse:0.45807
[5]	test-rmse:0.45032
[6]	test-rmse:0.44487
[7]	test-rmse:0.44027
[8]	test-rmse:0.43802
[9]	test-rmse:0.43639
[10]	test-rmse:0.43442
[11]	test-rmse:0.43337
[12]	test-rmse:0.43279
[13]	test-rmse:0.43185
[14]	test-rmse:0.43104
[15]	test-rmse:0.43076
[16]	test-rmse:0.43062
[17]	test-rmse:0.42990
[18]	test-rmse:0.42979
[19]	test-rmse:0.42947
[20]	test-rmse:0.42913
[21]	test-rmse:0.42921
[22]	test-rmse:0.42874
[23]	test-rmse:0.42862
[24]	test-rmse:0.42860
[25]	test-rmse:0.42839
[26]	test-rmse:0.42833
[27]	test-rmse:0.42789
[28]	test-rmse:0.42787
[29]	test-rmse:0.42785
[30]	test-rmse:0.42771
[31]	test-rmse:0.42763
[32]	test-rmse:0.42750
[33]	test-rmse:0.42741
[34]	test-rmse:0.42736
[35]	test-rmse:0.42732
[36]	test-rmse:0.42700
[37]	test-rmse:0.42673
[38]	test-rmse:0.42650
[39]	test-rmse:0.42651
[40]	test-rmse:0.42636
[41]	test-rmse:0.42631
[42]	test-rmse:0.42618
[43]	test-rmse:0.4259