In [1]:
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from google.colab import files  # for downloading file in Colab

# Load the December bike dataset
data_url = 'https://raw.githubusercontent.com/byui-cse/cse450-course/master/data/bikes_december.csv'
bikes_dec = pd.read_csv(data_url)

# Display dataset columns for verification
print("Columns in the December dataset:")
print(bikes_dec.columns)

# Use 'workingday' as the target variable (0 = non-working day, 1 = working day)
target = 'workingday'
print("Using target:", target)

# Separate features and labels
features = bikes_dec.drop(columns=[target])
labels = bikes_dec[target]

# Convert categorical features to dummy variables
features = pd.get_dummies(features)

# Split the data into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# Create a normalization layer and adapt it to the training data
normalizer = layers.Normalization(axis=-1)
normalizer.adapt(X_train.values)

# Build the neural network model for binary classification
def build_model():
    model = keras.Sequential([
        normalizer,  # Normalize the inputs
        layers.Dense(64, activation='relu'),
        layers.Dense(64, activation='relu'),
        layers.Dense(1, activation='sigmoid')  # Output a probability for working day
    ])

    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
    model.compile(loss='binary_crossentropy',
                  optimizer=optimizer,
                  metrics=['accuracy'])
    return model

model = build_model()
model.summary()

# Optional callback to reduce learning rate if validation loss plateaus
lr_callback = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=10)

# Train the model
history = model.fit(
    X_train,
    y_train,
    epochs=50,
    validation_split=0.2,
    batch_size=32,
    callbacks=[lr_callback],
    verbose=1
)

# ------------------ Generate Predictions for the Entire December Dataset ------------------ #

# Generate predictions on all rows of the dataset (features)
y_pred_probs = model.predict(features).ravel()

# Convert probabilities to binary predictions (0 or 1) using threshold 0.5
predictions = (y_pred_probs >= 0.5).astype(int)

# Create a DataFrame with a single column "predictions"
predictions_df = pd.DataFrame(predictions, columns=['predictions'])

# Export the predictions to a CSV file
csv_filename = "predictions.csv"
predictions_df.to_csv(csv_filename, index=False)
print(f"Predictions exported to {csv_filename}")

# Download the file using Colab's files module
files.download(csv_filename)


Columns in the December dataset:
Index(['dteday', 'hr', 'temp_c', 'feels_like_c', 'hum', 'windspeed',
       'weathersit', 'season', 'holiday', 'workingday'],
      dtype='object')
Using target: workingday


Epoch 1/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.6022 - loss: 0.6327 - val_accuracy: 0.9064 - val_loss: 0.3523 - learning_rate: 0.0010
Epoch 2/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.9463 - loss: 0.2920 - val_accuracy: 0.9957 - val_loss: 0.1386 - learning_rate: 0.0010
Epoch 3/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.9937 - loss: 0.1076 - val_accuracy: 1.0000 - val_loss: 0.0420 - learning_rate: 0.0010
Epoch 4/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 1.0000 - loss: 0.0303 - val_accuracy: 1.0000 - val_loss: 0.0167 - learning_rate: 0.0010
Epoch 5/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 1.0000 - loss: 0.0124 - val_accuracy: 1.0000 - val_loss: 0.0089 - learning_rate: 0.0010
Epoch 6/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>