# 🚴 Bike Rental Demand Prediction - Data Exploration & Preprocessing

## Step 1: Data Loading

In [None]:

import pandas as pd

# Load dataset
df = pd.read_csv("hour.csv")
df['dteday'] = pd.to_datetime(df['dteday'])  # convert date
df.head()
    

## Step 1: Data Visualization

In [None]:

import seab as sns
import matplotlib.pyplot as plt

# Average hourly demand
plt.figure(figsize=(12, 5))
sns.pointplot(data=df, x='hr', y='cnt', ci=None)
plt.title("Average Hourly Bike Demand")
plt.xlabel("Hour of Day")
plt.ylabel("Average Count")
plt.grid(True)
plt.show()
    

In [None]:

# Correlation heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(df.corr(), annot=True, fmt=".2f", cmap='coolwarm', square=True)
plt.title("Feature Correlation Matrix")
plt.show()
    

In [None]:

# Outlier detection with boxplots
numerical_cols = ['cnt', 'temp', 'hum', 'windspeed']
plt.figure(figsize=(14, 8))
for i, col in enumerate(numerical_cols, 1):
    plt.subplot(2, 2, i)
    sns.boxplot(data=df, y=col)
    plt.title(f'Boxplot of {col}')
plt.tight_layout()
plt.show()
    

## Step 2: Feature Engineering

In [None]:

# Extract features from datetime
df['day'] = df['dteday'].dt.day
df['month'] = df['dteday'].dt.month
df['year'] = df['dteday'].dt.year
df['dayofweek'] = df['dteday'].dt.dayofweek
df.head()
    

## Step 2: Encode Categorical Variables

In [None]:

# One-hot encode selected categorical features
categorical_features = ['season', 'weathersit', 'mnth', 'hr', 'weekday', 'holiday', 'workingday']
df_encoded = pd.get_dummies(df, columns=categorical_features, drop_first=True)
df_encoded.head()
    

## Step 2: Normalize Numerical Features

In [None]:

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaled_features = ['temp', 'atemp', 'hum', 'windspeed']
df_encoded[scaled_features] = scaler.fit_transform(df_encoded[scaled_features])
df_encoded.head()
    