# ðŸš² Divvy Bike-Share 2024 Analysis
This notebook combines, cleans, and prepares the 2024 Divvy bike-sharing data for analysis.

In [1]:
import pandas as pd
import glob
import os
from datetime import datetime

Current working directory:
/Users/kimberlymunoz/Documents


## ðŸ“¥ Load and Combine Data

In [None]:
# Adjust this path to where your CSV files are stored
path = "./divvy_data_2024"  # Folder containing all 12 monthly CSVs
all_files = glob.glob(os.path.join(path, "*.csv"))

df_list = []
for file in all_files:
    df = pd.read_csv(file)
    df["source_file"] = os.path.basename(file)
    df_list.append(df)

df = pd.concat(df_list, ignore_index=True)
df.head()

## ðŸ§¹ Clean the Data

In [None]:
# Check column names (in case they vary slightly between months)
df.columns = df.columns.str.lower().str.strip()

# Convert time columns to datetime
df['started_at'] = pd.to_datetime(df['started_at'])
df['ended_at'] = pd.to_datetime(df['ended_at'])

# Remove rows with nulls or invalid times
df = df.dropna(subset=['started_at', 'ended_at'])
df = df[df['ended_at'] > df['started_at']]

# Create ride_length in minutes
df['ride_length'] = (df['ended_at'] - df['started_at']).dt.total_seconds() / 60
df = df[df['ride_length'] > 0]  # Filter out zero or negative durations

## ðŸ§  Add Date & Time Features

In [None]:
df['day_of_week'] = df['started_at'].dt.day_name()
df['month'] = df['started_at'].dt.month_name()
df['hour'] = df['started_at'].dt.hour

def get_season(month):
    if month in [12, 1, 2]:
        return 'Winter'
    elif month in [3, 4, 5]:
        return 'Spring'
    elif month in [6, 7, 8]:
        return 'Summer'
    else:
        return 'Fall'

df['season'] = df['started_at'].dt.month.apply(get_season)
df.head()

## ðŸ’¾ Export Cleaned Data

In [None]:
# Save to CSV
df.to_csv("cleaned_divvy_2024.csv", index=False)
print("âœ… Cleaned dataset saved as cleaned_divvy_2024.csv")