In [None]:
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# Load the bikes dataset
bikes = pd.read_csv('https://raw.githubusercontent.com/byui-cse/cse450-course/master/data/bikes.csv')

# Display columns
print("Columns in the dataset:")
print(bikes.columns)

# Use 'workingday' as the target variable
target = 'workingday'
print("Using target:", target)

# Separate features and labels
features = bikes.drop(columns=[target])
labels = bikes[target]

# Convert categorical features to dummy variables
features = pd.get_dummies(features)
print("Convert categorical features to dummy variables:")

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# Create a normalization layer for the features
normalizer = layers.Normalization(axis=-1)
normalizer.adapt(X_train.values)
print("Create a normalization layer for the features")

# Build the neural network model for binary classification
def build_model():
    model = keras.Sequential([
        normalizer,  # Normalize input features
        layers.Dense(64, activation='relu'),
        layers.Dense(64, activation='relu'),
        layers.Dense(1, activation='sigmoid')  # Sigmoid output for binary classification
    ])

    # Compile the model with binary crossentropy loss and accuracy as a metric
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
    model.compile(loss='binary_crossentropy',
                  optimizer=optimizer,
                  metrics=['accuracy'])
    return model

model = build_model()
model.summary()

# Optional: callback to reduce learning rate if validation loss plateaus
lr_callback = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=10)
print("callback to reduce learning rate if validation loss plateaus")

# Train the model
history = model.fit(
    X_train,
    y_train,
    epochs=100,
    validation_split=0.2,
    callbacks=[lr_callback],
    verbose=1
)
print("Train the model")

# Plot training and validation loss over epochs
plt.figure(figsize=(8, 5))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.title('Training and Validation Loss')
plt.show()

print(" Plot training and validation loss over epochs")


# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test, y_test, verbose=2)
print("Test set Accuracy: {:5.2f}%".format(accuracy * 100))


Columns in the dataset:
Index(['dteday', 'hr', 'casual', 'registered', 'temp_c', 'feels_like_c', 'hum',
       'windspeed', 'weathersit', 'season', 'holiday', 'workingday'],
      dtype='object')
Using target: workingday
Convert categorical features to dummy variables:
