In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import KFold, train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

In [3]:
# Load the dataset
print("Loading dataset...")
df = pd.read_csv("S:/Course work/3rd year/Artificial Intelligence & Machine Learning/Workshop5/CarSharing.csv")
print(f"Dataset shape: {df.shape}")
print("\nDataset columns:", df.columns.tolist())
print("\nFirst few rows:")
print(df.head())

Loading dataset...
Dataset shape: (8708, 11)

Dataset columns: ['id', 'timestamp', 'season', 'holiday', 'workingday', 'weather', 'temp', 'temp_feel', 'humidity', 'windspeed', 'demand']

First few rows:
   id            timestamp  season holiday workingday                 weather  \
0   1  2017-01-01 00:00:00  spring      No         No  Clear or partly cloudy   
1   2  2017-01-01 01:00:00  spring      No         No  Clear or partly cloudy   
2   3  2017-01-01 02:00:00  spring      No         No  Clear or partly cloudy   
3   4  2017-01-01 03:00:00  spring      No         No  Clear or partly cloudy   
4   5  2017-01-01 04:00:00  spring      No         No  Clear or partly cloudy   

   temp  temp_feel  humidity  windspeed    demand  
0  9.84     14.395      81.0        0.0  2.772589  
1  9.02     13.635      80.0        0.0  3.688879  
2  9.02     13.635      80.0        0.0  3.465736  
3  9.84     14.395      75.0        0.0  2.564949  
4  9.84     14.395      75.0        0.0  0.000000  

In [4]:
# Check for missing values
print("\nMissing values:")
print(df.isnull().sum())


Missing values:
id               0
timestamp        0
season           0
holiday          0
workingday       0
weather          0
temp          1202
temp_feel      102
humidity        39
windspeed      200
demand           0
dtype: int64


In [5]:
# Data preprocessing
def preprocess_data(df):
    """Preprocess the data for neural network training"""
    df_processed = df.copy()
    
    # Handle datetime if timestamp column exists
    if 'timestamp' in df_processed.columns:
        try:
            df_processed['timestamp'] = pd.to_datetime(df_processed['timestamp'])
            df_processed['hour'] = df_processed['timestamp'].dt.hour
            df_processed['day_of_week'] = df_processed['timestamp'].dt.dayofweek
            df_processed['month'] = df_processed['timestamp'].dt.month
            # Drop the original timestamp column
            df_processed = df_processed.drop(['timestamp'], axis=1)
            print("Extracted time features: hour, day_of_week, month")
        except Exception as e:
            print(f"Error processing timestamp: {e}")
    
    # Drop id column if it exists
    if 'id' in df_processed.columns:
        df_processed = df_processed.drop(['id'], axis=1)
        print("Dropped 'id' column")
    
    # Handle categorical variables
    categorical_columns = []
    for col in df_processed.columns:
        if df_processed[col].dtype == 'object' and col != 'demand':
            categorical_columns.append(col)
    
    print(f"Categorical columns found: {categorical_columns}")
    
    # Encode categorical variables
    label_encoders = {}
    for column in categorical_columns:
        try:
            le = LabelEncoder()
            df_processed[column] = le.fit_transform(df_processed[column].astype(str))
            label_encoders[column] = le
            print(f"Encoded {column}: {list(le.classes_)}")
        except Exception as e:
            print(f"Error encoding {column}: {e}")
    
    # Convert all columns to numeric (except target)
    for col in df_processed.columns:
        if col != 'demand':
            df_processed[col] = pd.to_numeric(df_processed[col], errors='coerce')
    
    # Handle missing values
    print("Handling missing values...")
    numeric_columns = df_processed.select_dtypes(include=[np.number]).columns
    df_processed[numeric_columns] = df_processed[numeric_columns].fillna(df_processed[numeric_columns].median())
    
    print(f"Final processed shape: {df_processed.shape}")
    return df_processed, label_encoders

In [6]:
# Preprocess the data
print("\nPreprocessing data...")
df_processed, label_encoders = preprocess_data(df)


Preprocessing data...
Extracted time features: hour, day_of_week, month
Dropped 'id' column
Categorical columns found: ['season', 'holiday', 'workingday', 'weather']
Encoded season: ['fall', 'spring', 'summer', 'winter']
Encoded holiday: ['No', 'Yes']
Encoded workingday: ['No', 'Yes']
Encoded weather: ['Clear or partly cloudy', 'Light snow or rain', 'Mist', 'heavy rain/ice pellets/snow + fog']
Handling missing values...
Final processed shape: (8708, 12)


In [7]:
# Check if demand column exists
if 'demand' not in df_processed.columns:
    print("Error: 'demand' column not found in dataset!")
    print("Available columns:", df_processed.columns.tolist())
    exit()

In [8]:
# Separate features and target
X = df_processed.drop('demand', axis=1)
y = df_processed['demand']

print(f"\nFeatures shape: {X.shape}")
print(f"Target shape: {y.shape}")
print(f"Feature columns: {list(X.columns)}")

# Check for any remaining issues
print(f"\nTarget statistics:")
print(f"Min: {y.min():.4f}, Max: {y.max():.4f}, Mean: {y.mean():.4f}, Std: {y.std():.4f}")


Features shape: (8708, 11)
Target shape: (8708,)
Feature columns: ['season', 'holiday', 'workingday', 'weather', 'temp', 'temp_feel', 'humidity', 'windspeed', 'hour', 'day_of_week', 'month']

Target statistics:
Min: 0.0000, Max: 6.7923, Mean: 4.4527, Std: 1.4940


In [9]:
# Remove any rows with infinite or NaN values
print("Cleaning data...")
mask = np.isfinite(X).all(axis=1) & np.isfinite(y)
X = X[mask]
y = y[mask]
print(f"After cleaning - Features shape: {X.shape}, Target shape: {y.shape}")

Cleaning data...
After cleaning - Features shape: (8708, 11), Target shape: (8708,)


In [10]:
# Feature scaling
print("Scaling features...")
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Check for NaN in scaled features
if np.isnan(X_scaled).any():
    print("Warning: NaN found in scaled features. Replacing with zeros.")
    X_scaled = np.nan_to_num(X_scaled, nan=0.0)

print(f"Scaled features shape: {X_scaled.shape}")

Scaling features...
Scaled features shape: (8708, 11)


In [11]:
# Define the neural network architecture
def create_model(input_dim, hidden_layers=3, neurons_per_layer=64, dropout_rate=0.2, learning_rate=0.001):
    """Create a deep neural network model"""
    # Clear any existing models
    keras.backend.clear_session()
    
    model = keras.Sequential()
    
    # Input layer with regularization
    model.add(layers.Dense(neurons_per_layer, 
                          activation='relu', 
                          input_shape=(input_dim,),
                          kernel_regularizer=keras.regularizers.l2(0.001)))
    model.add(layers.BatchNormalization())
    model.add(layers.Dropout(dropout_rate))
    
    # Hidden layers
    for i in range(hidden_layers - 1):
        model.add(layers.Dense(neurons_per_layer, 
                              activation='relu',
                              kernel_regularizer=keras.regularizers.l2(0.001)))
        model.add(layers.BatchNormalization())
        model.add(layers.Dropout(dropout_rate))
    
    # Output layer
    model.add(layers.Dense(1, activation='linear'))
    
    # Compile the model
    optimizer = keras.optimizers.Adam(learning_rate=learning_rate, clipnorm=1.0)
    model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])
    
    return model

In [12]:
# Function to calculate all regression evaluation metrics
def calculate_metrics(y_true, y_pred):
    """Calculate comprehensive regression metrics with NaN handling"""
    # Convert to numpy arrays and ensure they're 1D
    y_true = np.array(y_true).flatten()
    y_pred = np.array(y_pred).flatten()
    
    # Remove NaN and infinite values
    mask = np.isfinite(y_true) & np.isfinite(y_pred)
    if np.sum(mask) == 0:
        return {
            'MSE': np.inf,
            'RMSE': np.inf,
            'MAE': np.inf,
            'R2': -np.inf,
            'MAPE': np.inf,
            'MBE': np.inf
        }
    
    y_true_clean = y_true[mask]
    y_pred_clean = y_pred[mask]
    
    try:
        mse = mean_squared_error(y_true_clean, y_pred_clean)
        rmse = np.sqrt(mse)
        mae = mean_absolute_error(y_true_clean, y_pred_clean)
        r2 = r2_score(y_true_clean, y_pred_clean)
        
        # Mean Absolute Percentage Error (MAPE) - handle division by zero
        mask_nonzero = y_true_clean != 0
        if np.sum(mask_nonzero) > 0:
            mape = np.mean(np.abs((y_true_clean[mask_nonzero] - y_pred_clean[mask_nonzero]) / y_true_clean[mask_nonzero])) * 100
        else:
            mape = 0
        
        # Mean Bias Error (MBE)
        mbe = np.mean(y_pred_clean - y_true_clean)
        
        return {
            'MSE': mse,
            'RMSE': rmse,
            'MAE': mae,
            'R2': r2,
            'MAPE': mape,
            'MBE': mbe
        }
    except Exception as e:
        print(f"Error calculating metrics: {e}")
        return {
            'MSE': np.inf,
            'RMSE': np.inf,
            'MAE': np.inf,
            'R2': -np.inf,
            'MAPE': np.inf,
            'MBE': np.inf
        }

In [13]:
# Define hyperparameter combinations to test
param_combinations = [
    {'hidden_layers': 2, 'neurons_per_layer': 32, 'dropout_rate': 0.1, 'learning_rate': 0.001, 'batch_size': 32, 'epochs': 50},
    {'hidden_layers': 2, 'neurons_per_layer': 64, 'dropout_rate': 0.2, 'learning_rate': 0.001, 'batch_size': 64, 'epochs': 50},
    {'hidden_layers': 3, 'neurons_per_layer': 32, 'dropout_rate': 0.1, 'learning_rate': 0.001, 'batch_size': 32, 'epochs': 50},
    {'hidden_layers': 3, 'neurons_per_layer': 64, 'dropout_rate': 0.2, 'learning_rate': 0.001, 'batch_size': 64, 'epochs': 50},
    {'hidden_layers': 3, 'neurons_per_layer': 64, 'dropout_rate': 0.3, 'learning_rate': 0.0001, 'batch_size': 64, 'epochs': 75},
    {'hidden_layers': 4, 'neurons_per_layer': 32, 'dropout_rate': 0.2, 'learning_rate': 0.001, 'batch_size': 64, 'epochs': 50},
    {'hidden_layers': 2, 'neurons_per_layer': 128, 'dropout_rate': 0.2, 'learning_rate': 0.0001, 'batch_size': 32, 'epochs': 75},
    {'hidden_layers': 3, 'neurons_per_layer': 128, 'dropout_rate': 0.1, 'learning_rate': 0.001, 'batch_size': 128, 'epochs': 50}
]

In [14]:
# Hyperparameter tuning with 5-fold cross-validation
def hyperparameter_tuning(X, y, param_combinations):
    """Perform hyperparameter tuning using cross-validation"""
    
    best_score = float('inf')
    best_params = None
    best_metrics = None
    
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    
    print(f"Testing {len(param_combinations)} parameter combinations with 5-fold CV...")
    
    for i, params in enumerate(param_combinations):
        print(f"\nTrial {i+1}/{len(param_combinations)}: {params}")
        
        fold_scores = []
        fold_metrics = {'MSE': [], 'RMSE': [], 'MAE': [], 'R2': [], 'MAPE': [], 'MBE': []}
        
        for fold_num, (train_idx, val_idx) in enumerate(kf.split(X)):
            try:
                X_train_fold, X_val_fold = X[train_idx], X[val_idx]
                y_train_fold, y_val_fold = y.iloc[train_idx], y.iloc[val_idx]
                
                # Create and train model
                model = create_model(
                    input_dim=X.shape[1],
                    hidden_layers=params['hidden_layers'],
                    neurons_per_layer=params['neurons_per_layer'],
                    dropout_rate=params['dropout_rate'],
                    learning_rate=params['learning_rate']
                )
                
                # Callbacks
                early_stopping = keras.callbacks.EarlyStopping(
                    monitor='val_loss', 
                    patience=10, 
                    restore_best_weights=True, 
                    verbose=0,
                    min_delta=0.001
                )
                
                reduce_lr = keras.callbacks.ReduceLROnPlateau(
                    monitor='val_loss', 
                    factor=0.5, 
                    patience=5, 
                    min_lr=1e-7,
                    verbose=0
                )
                
                # Train the model
                history = model.fit(
                    X_train_fold, y_train_fold,
                    validation_data=(X_val_fold, y_val_fold),
                    epochs=params['epochs'],
                    batch_size=params['batch_size'],
                    verbose=0,
                    callbacks=[early_stopping, reduce_lr]
                )
                
                # Predict and calculate metrics
                y_pred = model.predict(X_val_fold, verbose=0).flatten()
                
                # Check for NaN in predictions
                if np.isnan(y_pred).any():
                    print(f"Warning: NaN predictions found in fold {fold_num}")
                    y_pred = np.nan_to_num(y_pred, nan=np.median(y_train_fold))
                
                metrics = calculate_metrics(y_val_fold, y_pred)
                
                if np.isfinite(metrics['MSE']):
                    fold_scores.append(metrics['MSE'])
                    for metric_name, value in metrics.items():
                        fold_metrics[metric_name].append(value)
                else:
                    print(f"Invalid metrics in fold {fold_num}")
                    
            except Exception as e:
                print(f"Error in fold {fold_num}: {e}")
                continue
        
        if len(fold_scores) >= 3:  # Need at least 3 valid folds
            # Calculate average metrics across folds
            avg_score = np.mean(fold_scores)
            avg_metrics = {metric: np.mean(values) for metric, values in fold_metrics.items() if len(values) > 0}
            
            print(f"Valid folds: {len(fold_scores)}/5")
            print(f"Average MSE: {avg_score:.4f}")
            if 'R2' in avg_metrics:
                print(f"Average R2: {avg_metrics['R2']:.4f}")
            
            # Update best parameters if this is the best score so far
            if avg_score < best_score:
                best_score = avg_score
                best_params = params
                best_metrics = avg_metrics
                print("*** New best parameters found! ***")
        else:
            print(f"Not enough valid folds ({len(fold_scores)}/5) for this parameter combination.")
    
    return best_params, best_score, best_metrics

In [15]:
# Perform hyperparameter tuning
print("Starting hyperparameter tuning...")
best_params, best_score, best_metrics = hyperparameter_tuning(X_scaled, y, param_combinations)

if best_params is None:
    print("Error: No valid parameter combination found!")
    # Use default parameters
    best_params = {'hidden_layers': 3, 'neurons_per_layer': 64, 'dropout_rate': 0.2, 'learning_rate': 0.001, 'batch_size': 64, 'epochs': 50}
    print(f"Using default parameters: {best_params}")

print("\n" + "="*50)
print("HYPERPARAMETER TUNING RESULTS")
print("="*50)
print(f"Best parameters: {best_params}")
if best_metrics:
    print(f"Best MSE score: {best_score:.4f}")
    print("\nBest model performance metrics:")
    for metric_name, value in best_metrics.items():
        print(f"{metric_name}: {value:.4f}")

Starting hyperparameter tuning...
Testing 8 parameter combinations with 5-fold CV...

Trial 1/8: {'hidden_layers': 2, 'neurons_per_layer': 32, 'dropout_rate': 0.1, 'learning_rate': 0.001, 'batch_size': 32, 'epochs': 50}

Valid folds: 5/5
Average MSE: 0.2463
Average R2: 0.8897
*** New best parameters found! ***

Trial 2/8: {'hidden_layers': 2, 'neurons_per_layer': 64, 'dropout_rate': 0.2, 'learning_rate': 0.001, 'batch_size': 64, 'epochs': 50}
Valid folds: 5/5
Average MSE: 0.2332
Average R2: 0.8955
*** New best parameters found! ***

Trial 3/8: {'hidden_layers': 3, 'neurons_per_layer': 32, 'dropout_rate': 0.1, 'learning_rate': 0.001, 'batch_size': 32, 'epochs': 50}
Valid folds: 5/5
Average MSE: 0.2302
Average R2: 0.8969
*** New best parameters found! ***

Trial 4/8: {'hidden_layers': 3, 'neurons_per_layer': 64, 'dropout_rate': 0.2, 'learning_rate': 0.001, 'batch_size': 64, 'epochs': 50}
Valid folds: 5/5
Average MSE: 0.2266
Average R2: 0.8983
*** New best parameters found! ***

Trial 5/8

In [16]:
# Train the final model with best parameters using 5-fold cross-validation
print("\n" + "="*50)
print("FINAL MODEL EVALUATION WITH 5-FOLD CROSS-VALIDATION")
print("="*50)

kf = KFold(n_splits=5, shuffle=True, random_state=42)
fold_results = []

for fold, (train_idx, val_idx) in enumerate(kf.split(X_scaled)):
    print(f"\nFold {fold + 1}/5:")
    
    try:
        X_train_fold, X_val_fold = X_scaled[train_idx], X_scaled[val_idx]
        y_train_fold, y_val_fold = y.iloc[train_idx], y.iloc[val_idx]
        
        # Create and train the best model
        model = create_model(
            input_dim=X_scaled.shape[1],
            hidden_layers=best_params['hidden_layers'],
            neurons_per_layer=best_params['neurons_per_layer'],
            dropout_rate=best_params['dropout_rate'],
            learning_rate=best_params['learning_rate']
        )
        
        # Callbacks
        early_stopping = keras.callbacks.EarlyStopping(
            monitor='val_loss', patience=15, restore_best_weights=True, verbose=0
        )
        
        reduce_lr = keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss', factor=0.5, patience=7, min_lr=1e-7, verbose=0
        )
        
        # Train model
        history = model.fit(
            X_train_fold, y_train_fold,
            validation_data=(X_val_fold, y_val_fold),
            epochs=best_params['epochs'],
            batch_size=best_params['batch_size'],
            verbose=0,
            callbacks=[early_stopping, reduce_lr]
        )
        
        # Make predictions
        y_pred = model.predict(X_val_fold, verbose=0).flatten()
        
        # Handle NaN predictions
        if np.isnan(y_pred).any():
            print("Warning: NaN predictions found, replacing with median")
            y_pred = np.nan_to_num(y_pred, nan=np.median(y_train_fold))
        
        # Calculate metrics
        metrics = calculate_metrics(y_val_fold, y_pred)
        
        if np.isfinite(metrics['MSE']):
            fold_results.append(metrics)
            
            print(f"MSE: {metrics['MSE']:.4f}")
            print(f"RMSE: {metrics['RMSE']:.4f}")
            print(f"MAE: {metrics['MAE']:.4f}")
            print(f"R2: {metrics['R2']:.4f}")
            print(f"MAPE: {metrics['MAPE']:.4f}%")
            print(f"MBE: {metrics['MBE']:.4f}")
        else:
            print("Invalid metrics for this fold")
            
    except Exception as e:
        print(f"Error in fold {fold + 1}: {e}")
        continue
        


FINAL MODEL EVALUATION WITH 5-FOLD CROSS-VALIDATION

Fold 1/5:
MSE: 0.2098
RMSE: 0.4581
MAE: 0.3414
R2: 0.9044
MAPE: 10.4595%
MBE: 0.0983

Fold 2/5:
MSE: 0.2064
RMSE: 0.4543
MAE: 0.3436
R2: 0.9103
MAPE: 10.4257%
MBE: -0.0103

Fold 3/5:
MSE: 0.1865
RMSE: 0.4319
MAE: 0.3200
R2: 0.9172
MAPE: 10.1050%
MBE: 0.0023

Fold 4/5:
MSE: 0.1848
RMSE: 0.4298
MAE: 0.3261
R2: 0.9141
MAPE: 9.4727%
MBE: 0.0188

Fold 5/5:
MSE: 0.2007
RMSE: 0.4480
MAE: 0.3278
R2: 0.9108
MAPE: 10.7693%
MBE: 0.0490


In [17]:
# Calculate final statistics across all folds
if len(fold_results) > 0:
    print("\n" + "="*50)
    print("FINAL RESULTS - 5-FOLD CROSS-VALIDATION SUMMARY")
    print("="*50)

    final_metrics = {}
    for metric_name in fold_results[0].keys():
        values = [fold[metric_name] for fold in fold_results]
        final_metrics[metric_name] = {
            'mean': np.mean(values),
            'std': np.std(values),
            'min': np.min(values),
            'max': np.max(values)
        }

    for metric_name, stats in final_metrics.items():
        print(f"\n{metric_name}:")
        print(f"  Mean: {stats['mean']:.4f} ± {stats['std']:.4f}")
        print(f"  Range: [{stats['min']:.4f}, {stats['max']:.4f}]")

    # Train final model on full dataset
    print(f"\n{'='*50}")
    print("TRAINING FINAL MODEL ON FULL DATASET")
    print("="*50)

    # Split data for final training (80-20 split)
    X_train_final, X_test_final, y_train_final, y_test_final = train_test_split(
        X_scaled, y, test_size=0.2, random_state=42
    )

    final_model = create_model(
        input_dim=X_scaled.shape[1],
        hidden_layers=best_params['hidden_layers'],
        neurons_per_layer=best_params['neurons_per_layer'],
        dropout_rate=best_params['dropout_rate'],
        learning_rate=best_params['learning_rate']
    )

    early_stopping = keras.callbacks.EarlyStopping(
        monitor='val_loss', patience=15, restore_best_weights=True
    )

    reduce_lr = keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss', factor=0.5, patience=7, min_lr=1e-7
    )

    final_history = final_model.fit(
        X_train_final, y_train_final,
        validation_data=(X_test_final, y_test_final),
        epochs=best_params['epochs'],
        batch_size=best_params['batch_size'],
        callbacks=[early_stopping, reduce_lr],
        verbose=1
    )

    # Final predictions and evaluation
    y_pred_final = final_model.predict(X_test_final).flatten()
    
    # Handle NaN predictions
    if np.isnan(y_pred_final).any():
        print("Warning: NaN predictions in final model, replacing with median")
        y_pred_final = np.nan_to_num(y_pred_final, nan=np.median(y_train_final))
    
    final_test_metrics = calculate_metrics(y_test_final, y_pred_final)

    print(f"\nFinal Model Test Set Performance:")
    for metric_name, value in final_test_metrics.items():
        print(f"{metric_name}: {value:.4f}")

    print(f"\n{'='*50}")
    print("MODEL SUMMARY")
    print("="*50)
    final_model.summary()

    print(f"\nFeature names used in the model:")
    for i, feature in enumerate(X.columns):
        print(f"{i}: {feature}")
        
    print(f"\nDataset size: {X_scaled.shape[0]} samples")
    print(f"Number of features: {X_scaled.shape[1]}")
    print(f"Target range: [{y.min():.4f}, {y.max():.4f}]")
else:
    print("No valid fold results obtained. Please check your data and parameters.")
    


FINAL RESULTS - 5-FOLD CROSS-VALIDATION SUMMARY

MSE:
  Mean: 0.1976 ± 0.0102
  Range: [0.1848, 0.2098]

RMSE:
  Mean: 0.4444 ± 0.0115
  Range: [0.4298, 0.4581]

MAE:
  Mean: 0.3318 ± 0.0091
  Range: [0.3200, 0.3436]

R2:
  Mean: 0.9114 ± 0.0043
  Range: [0.9044, 0.9172]

MAPE:
  Mean: 10.2464 ± 0.4404
  Range: [9.4727, 10.7693]

MBE:
  Mean: 0.0316 ± 0.0388
  Range: [-0.0103, 0.0983]

TRAINING FINAL MODEL ON FULL DATASET
Epoch 1/50
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 28ms/step - loss: 20.5178 - mae: 4.2333 - val_loss: 14.3952 - val_mae: 3.5341 - learning_rate: 0.0010
Epoch 2/50
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - loss: 12.0912 - mae: 3.2087 - val_loss: 5.5204 - val_mae: 2.0834 - learning_rate: 0.0010
Epoch 3/50
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - loss: 4.3195 - mae: 1.6961 - val_loss: 1.3478 - val_mae: 0.7943 - learning_rate: 0.0010
Epoch 4/50
[1m55/55[0m [32m━━━━━━━━━━━━━━


Feature names used in the model:
0: season
1: holiday
2: workingday
3: weather
4: temp
5: temp_feel
6: humidity
7: windspeed
8: hour
9: day_of_week
10: month

Dataset size: 8708 samples
Number of features: 11
Target range: [0.0000, 6.7923]


In [None]:
# Key Features:

# Data Preprocessing:
# - Extracts time features from timestamp (hour, day_of_week, month)
# - Encodes categorical variables (season, holiday, workingday, weather)
# - Standardizes features for neural network training


# Neural Network Architecture:
# - Configurable deep network with multiple hidden layers
# - Dropout layers for regularization
# - Adam optimizer with configurable learning rate


# - Hyperparameter Tuning:
# - Grid search over multiple hyperparameters
# - 5-fold cross-validation for each parameter combination
# - Early stopping to prevent overfitting

# Comprehensive Evaluation Metrics:
# - MSE (Mean Squared Error)
# - RMSE (Root Mean Squared Error)
# - MAE (Mean Absolute Error)
# - R² (R-squared)
# - MAPE (Mean Absolute Percentage Error)
# - MBE (Mean Bias Error)

# 5-Fold Cross-Validation:
# - Evaluates model performance across different data splits
# - Provides statistical summary (mean, std, min, max) for all metrics

# Final Model Training:
# - Trains the best model on the full dataset
# - Provides final test set evaluation
