# Amsterdam Bike Usage Prediction - Data Collection

This notebook collects bike usage data and weather data.

In [1]:
import sys
sys.path.append('..')

from src.data.collect_bike_data import BikeDataCollector
from src.data.collect_weather_data import WeatherDataCollector
from datetime import datetime, timedelta

## 1. Collect Bike Data

In [2]:
# Initialize bike data collector
bike_collector = BikeDataCollector()

# Set date range
end_date = datetime.now()
start_date = end_date - timedelta(days=30)

# Get bike data
bike_data = bike_collector.get_bike_counts(start_date, end_date)

# Display sample
print('The number of records is : ', len(bike_data))
print("Sample of bike data:")

display(bike_data.head())

# Save data
bike_collector.save_data(bike_data, '../data/raw/bike_counts.csv')

The number of records is :  2232
Sample of bike data:


Unnamed: 0,timestamp,hour,day_of_week,month,count,location_id
0,2024-12-31 04:56:59.990346,4,1,12,45,1
1,2024-12-31 05:56:59.990346,5,1,12,55,1
2,2024-12-31 06:56:59.990346,6,1,12,273,1
3,2024-12-31 07:56:59.990346,7,1,12,200,1
4,2024-12-31 08:56:59.990346,8,1,12,670,1


Data saved to ../data/raw/bike_counts.csv


## 2. Collect Weather Data

In [4]:
# Initialize weather data collector
weather_collector = WeatherDataCollector()

# Define exact date range to match bike data
start_date = datetime(2024, 12, 31, 3, 4, 4)
end_date = datetime(2025, 1, 31, 2, 4, 4)

print("=== Input Validation ===")
print(f"Start date: {start_date}")
print(f"End date: {end_date}")
print(f"Total days: {(end_date - start_date).days + 1}")
print(f"Expected hours: {((end_date - start_date).total_seconds() / 3600):.2f}")

# Get daily weather data first
print("\n=== Collecting Daily Weather Data ===")
daily_data = weather_collector._get_daily_data(start_date, end_date)

if daily_data is not None:
    print(f"\nGot {len(daily_data)} days of data")
    print("Daily data date range:")
    print(f"Start: {daily_data['timestamp'].min()}")
    print(f"End: {daily_data['timestamp'].max()}")
    print("\nDaily data sample:")
    display(daily_data.head())

    # Convert to hourly
    print("\n=== Converting to Hourly Data ===")
    weather_data = weather_collector._convert_to_hourly(daily_data, start_date, end_date)

    # Display validation
    if weather_data is not None:
        print("\n=== Final Data Validation ===")
        print(f"Total records: {len(weather_data)}")
        print("\nTimestamp Analysis:")
        print(f"First timestamp: {weather_data['timestamp'].min()}")
        print(f"Last timestamp: {weather_data['timestamp'].max()}")
        print(f"Number of unique days: {weather_data['timestamp'].dt.date.nunique()}")
        print(f"Hours per day: {len(weather_data) / weather_data['timestamp'].dt.date.nunique():.2f}")
        
        print("\nHourly data sample:")
        display(weather_data.head())
        
        print("\nValue Ranges:")
        for col in ['temperature', 'rain', 'wind_speed', 'cloud_cover']:
            print(f"{col}: {weather_data[col].min():.2f} to {weather_data[col].max():.2f}")
        
        # Save data
        weather_collector.save_data(weather_data, '../data/raw/weather_data.csv')
        # Get locations
        locations = weather_collector.get_locations()

        # Save locations data
        weather_collector.save_locations(locations, 'data/raw/locations.csv')

=== Input Validation ===
Start date: 2024-12-31 03:04:04
End date: 2025-01-31 02:04:04
Total days: 31
Expected hours: 743.00

=== Collecting Daily Weather Data ===
Fetching daily weather data...

Got 29 days of data
Daily data date range:
Start: 2024-12-31 00:00:00
End: 2025-01-28 00:00:00

Daily data sample:


Unnamed: 0,timestamp,temperature,rain,wind_speed,cloud_cover
0,2024-12-31,5.5,0.0,8.9,8
1,2025-01-01,7.8,20.9,11.6,8
2,2025-01-02,3.2,2.8,3.0,4
3,2025-01-03,2.7,2.1,4.9,7
4,2025-01-04,2.3,1.4,3.7,8



=== Converting to Hourly Data ===

Converting 29 days to hourly data...
Created hourly range from 2024-12-31 03:04:04 to 2025-01-31 02:04:04
Total hours: 744
Created 744 hourly records

=== Final Data Validation ===
Total records: 744

Timestamp Analysis:
First timestamp: 2024-12-31 03:04:04
Last timestamp: 2025-01-31 02:04:04
Number of unique days: 32
Hours per day: 23.25

Hourly data sample:


Unnamed: 0,timestamp,temperature,rain,wind_speed,cloud_cover
0,2024-12-31 03:04:04,7.431852,0.0,10.086634,8.0
1,2024-12-31 04:04:04,7.232051,0.0,8.300623,8.0
2,2024-12-31 05:04:04,6.914214,0.0,8.95293,8.0
3,2024-12-31 06:04:04,6.5,0.0,9.223544,8.0
4,2024-12-31 07:04:04,6.017638,0.0,8.096308,8.0



Value Ranges:
temperature: -3.00 to 10.60
rain: 0.00 to 2.75
wind_speed: 0.63 to 12.85
cloud_cover: 3.00 to 8.00

Data saved to ../data/raw/weather_data.csv
Records: 744
Date range: 2024-12-31 03:04:04 to 2025-01-31 02:04:04


AttributeError: 'WeatherDataCollector' object has no attribute 'get_locations'