# Deep Learning

In [1]:
import numpy as np
import tensorflow as tf
print("NumPy Version:", np.__version__)
print("TensorFlow Version:", tf.__version__)

NumPy Version: 1.24.3
TensorFlow Version: 2.12.1


In [2]:
import tensorflow as tf
print("Built with CUDA:", tf.test.is_built_with_cuda())
print("Physical GPUs:", tf.config.list_physical_devices('GPU'))
print("Build info:", tf.sysconfig.get_build_info())

Built with CUDA: False
Physical GPUs: []
Build info: OrderedDict([('is_cuda_build', False), ('is_rocm_build', False), ('is_tensorrt_build', False), ('msvcp_dll_names', 'msvcp140.dll,msvcp140_1.dll')])


In [3]:
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import Pipeline
import numpy as np
import pandas as pd

import sys
sys.path.append('..')
from utils import get_train_data, custom_preprocessor, submit_test, train_test_split_temporal


In [4]:
X, y = get_train_data() # X comes out prepared with added columns

# Train-test split
X_train, X_valid, y_train, y_valid = train_test_split_temporal(X, y)

X_train = X_train.drop(columns=["date"])
X_valid = X_valid.drop(columns=["date"])


Loading school holidays from cache...


In [5]:
# Fit the preprocessor on the training data
custom_preprocessor.fit(X_train)

# Transform the data
X_train_processed = custom_preprocessor.transform(X_train)
X_valid_processed = custom_preprocessor.transform(X_valid)


In [9]:
import numpy as np
from keras.callbacks import Callback
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.regularizers import l2
from keras.callbacks import EarlyStopping, ModelCheckpoint

# Assuming X_train, y_train, X_val, y_val are already prepared

# Define a simple model
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],), kernel_regularizer=l2(0.01)),
    Dropout(0.3),
    Dense(64, activation='relu', kernel_regularizer=l2(0.01)),
    Dropout(0.3),
    Dense(1, activation='linear')  # Output layer for regression
])

model.compile(optimizer='adam', loss='mse', metrics=['mae'])  # Loss adjusted for regression

# Custom callback to save training and validation scores
class TrainingHistory(Callback):
    def on_train_begin(self, logs=None):
        self.train_scores = []
        self.val_scores = []

    def on_epoch_end(self, epoch, logs=None):
        self.train_scores.append(logs.get('loss'))
        self.val_scores.append(logs.get('val_loss'))

history_callback = TrainingHistory()

# Early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=10)

# Train the model
history = model.fit(
    X_train, y_train,
    validation_data=(X_valid, y_valid),
    epochs=400,
    batch_size=32,
    callbacks=[history_callback,  ModelCheckpoint("DL_128_64.h5", monitor="val_loss", save_best_only=True)],
    verbose=1
)

Epoch 1/400
Epoch 2/400
Epoch 3/400
Epoch 4/400
Epoch 5/400
Epoch 6/400
Epoch 7/400
Epoch 8/400
Epoch 9/400
Epoch 10/400
Epoch 11/400
Epoch 12/400
Epoch 13/400
Epoch 14/400
Epoch 15/400
Epoch 16/400
Epoch 17/400
Epoch 18/400
Epoch 19/400
Epoch 20/400
Epoch 21/400
Epoch 22/400
Epoch 23/400
Epoch 24/400
Epoch 25/400
Epoch 26/400
Epoch 27/400
Epoch 28/400
Epoch 29/400
Epoch 30/400
Epoch 31/400
Epoch 32/400
Epoch 33/400
Epoch 34/400
Epoch 35/400
Epoch 36/400
Epoch 37/400
Epoch 38/400
Epoch 39/400
Epoch 40/400
Epoch 41/400
Epoch 42/400
Epoch 43/400
Epoch 44/400
Epoch 45/400
Epoch 46/400
Epoch 47/400
Epoch 48/400
Epoch 49/400
Epoch 50/400
Epoch 51/400
Epoch 52/400
Epoch 53/400
Epoch 54/400
Epoch 55/400
Epoch 56/400
Epoch 57/400
Epoch 58/400
Epoch 59/400
Epoch 60/400
Epoch 61/400
Epoch 62/400
Epoch 63/400
Epoch 64/400
Epoch 65/400
Epoch 66/400
Epoch 67/400
Epoch 68/400
Epoch 69/400
Epoch 70/400
Epoch 71/400
Epoch 72/400
Epoch 73/400
Epoch 74/400
Epoch 75/400
Epoch 76/400
Epoch 77/400
Epoch 78

In [None]:
# Save the scores for later plotting
np.save("train_scores.npy", np.array(history_callback.train_scores))
np.save("val_scores.npy", np.array(history_callback.val_scores))

In [None]:
model.save("DL_128_64.h5")

In [10]:
y_pred = model.predict(X_valid)

score = np.sqrt(mean_squared_error(y_pred, y_valid))



In [11]:
np.round(score, 2)

1.49

In [None]:
from utils import preparation
file_name = "../submissions/" + "deep_learning_initial" + "_submission.csv"

X_test = pd.read_parquet("../data/final_test.parquet")
X_test = preparation(X_test)
X_test = X_test.drop(columns=["date"])
X_test_processed = custom_preprocessor.transform(X_test)

y_predict = model.predict(X_test_processed)

results = pd.DataFrame(
dict(
    Id=np.arange(y_predict.shape[0]),
    log_bike_count=y_predict.flatten(),
    )
)
results.to_csv(file_name, index=False)

Loading school holidays from cache...


In [None]:
#submit_test(pipeline, "Ridge")