In [40]:
import os
import requests
import zipfile
import pandas as pd
from io import BytesIO
from datetime import datetime

# Define the base URL and the range of years
base_url = "https://divvy-tripdata.s3.amazonaws.com/"
years = range(2020, 2026)  # 2020 to 2025 inclusive

# Create a directory to store the downloaded files
os.makedirs("divvy_data", exist_ok=True)

# Initialize an empty DataFrame to store all data
all_data = pd.DataFrame()

for year in years:
    for month in range(1, 13):
        # Construct the filename and URL
        filename = f"{year}{month:02d}-divvy-tripdata.zip"
        file_url = base_url + filename

        try:
            # Download the ZIP file
            response = requests.get(file_url)
            response.raise_for_status()  # Check if the request was successful

            # Extract the CSV file from the ZIP
            with zipfile.ZipFile(BytesIO(response.content)) as z:
                # Assuming there's only one CSV file in the ZIP
                csv_filename = z.namelist()[0]
                with z.open(csv_filename) as csvfile:
                    # Read the CSV into a DataFrame
                    df = pd.read_csv(csvfile)

                    # Extract date from the 'started_at' column
                    df['date'] = pd.to_datetime(df['started_at']).dt.date

                    # Append to the main DataFrame
                    all_data = pd.concat([all_data, df], ignore_index=True)

            print(f"Processed {filename}")

        except requests.HTTPError as e:
            print(f"Failed to download {filename}: {e}")
        except zipfile.BadZipFile:
            print(f"Failed to extract {filename}: Bad ZIP file")
        except Exception as e:
            print(f"An error occurred with {filename}: {e}")

# Group by the desired columns and count distinct ride IDs
grouped_df = all_data.groupby(
    ['rideable_type', 'date', 'start_station_name']
)['ride_id'].nunique().reset_index()

# Rename the 'ride_id' column to 'rides'
grouped_df.rename(columns={'ride_id': 'rides'}, inplace=True)

# Print the number of rows in the grouped DataFrame
print("Number of rows:", grouped_df.shape[0])

Failed to download 202001-divvy-tripdata.zip: 404 Client Error: Not Found for url: https://divvy-tripdata.s3.amazonaws.com/202001-divvy-tripdata.zip
Failed to download 202002-divvy-tripdata.zip: 404 Client Error: Not Found for url: https://divvy-tripdata.s3.amazonaws.com/202002-divvy-tripdata.zip
Error processing 202502-divvy-tripdata.zip: 404 Client Error: Not Found for url: https://divvy-tripdata.s3.amazonaws.com/202502-divvy-tripdata.zip
Failed to download 202003-divvy-tripdata.zip: 404 Client Error: Not Found for url: https://divvy-tripdata.s3.amazonaws.com/202003-divvy-tripdata.zip
Error processing 202503-divvy-tripdata.zip: 404 Client Error: Not Found for url: https://divvy-tripdata.s3.amazonaws.com/202503-divvy-tripdata.zip
Error processing 202504-divvy-tripdata.zip: 404 Client Error: Not Found for url: https://divvy-tripdata.s3.amazonaws.com/202504-divvy-tripdata.zip
Error processing 202505-divvy-tripdata.zip: 404 Client Error: Not Found for url: https://divvy-tripdata.s3.amazo

In [52]:
grouped_df.to_csv('2020_2025_Divvy.csv')