# Data Collection from Visual Crossing Weather API

This notebook collects weather data for Hanoi from Visual Crossing API.

In [1]:
import requests
import pandas as pd
import os
from datetime import datetime, timedelta
from dotenv import load_dotenv
from pathlib import Path

# Load environment variables from .env file in project root
env_path = Path(__file__).parent.parent / '.env' if '__file__' in globals() else Path('../.env')
load_dotenv(dotenv_path=env_path)

True

In [2]:
# Visual Crossing API configuration
API_KEY = os.getenv("VISUAL_CROSSING_API_KEY")
BASE_URL = "https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline"

if not API_KEY:
    raise ValueError("API key not found! Please set VISUAL_CROSSING_API_KEY in .env file")

# Location
LOCATION = "Hanoi,Vietnam"

# Date range (last 15 days)
end_date = datetime.now()
start_date = end_date - timedelta(days=15)

# Format dates
start_date_str = start_date.strftime("%Y-%m-%d")
end_date_str = end_date.strftime("%Y-%m-%d")

In [3]:
def fetch_weather_data(location, start_date, end_date, api_key):
    """
    Fetch weather data from Visual Crossing API
    
    Parameters:
    - location: Location name (e.g., "Hanoi,Vietnam")
    - start_date: Start date (YYYY-MM-DD)
    - end_date: End date (YYYY-MM-DD)
    - api_key: Your Visual Crossing API key
    
    Returns:
    - DataFrame with weather data
    """
    
    # Build API URL
    url = f"{BASE_URL}/{location}/{start_date}/{end_date}"
    
    # API parameters
    params = {
        'unitGroup': 'metric',  # Use metric units (Celsius, km/h, etc.)
        'key': api_key,
        'include': 'days,hours',  # Include daily and hourly data
        'contentType': 'json'
    }
    
    try:
        print(f"Fetching weather data for {location} from {start_date} to {end_date}...")
        response = requests.get(url, params=params)
        response.raise_for_status()  # Raise an error for bad status codes
        
        data = response.json()
        print("Data fetched successfully!")
        return data
        
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data: {e}")
        return None

In [4]:
# Fetch the data
weather_data = fetch_weather_data(LOCATION, start_date_str, end_date_str, API_KEY)

if weather_data:
    print(f"\nLocation: {weather_data.get('resolvedAddress', 'N/A')}")
    print(f"Timezone: {weather_data.get('timezone', 'N/A')}")
    print(f"Number of days: {len(weather_data.get('days', []))}")

Fetching weather data for Hanoi,Vietnam from 2025-11-01 to 2025-11-16...
Data fetched successfully!

Location: Hà Nội, Việt Nam
Timezone: Asia/Bangkok
Number of days: 16


In [5]:
def process_daily_data(weather_data):
    """
    Convert daily weather data to DataFrame
    """
    if not weather_data or 'days' not in weather_data:
        return None
    
    daily_data = []
    for day in weather_data['days']:
        daily_data.append(day)
    
    df_daily = pd.DataFrame(daily_data)
    print(f"Daily data shape: {df_daily.shape}")
    print(f"Columns: {df_daily.columns.tolist()}")
    
    return df_daily

def process_hourly_data(weather_data):
    """
    Convert hourly weather data to DataFrame
    """
    if not weather_data or 'days' not in weather_data:
        return None
    
    hourly_data = []
    for day in weather_data['days']:
        if 'hours' in day:
            date = day['datetime']
            for hour in day['hours']:
                hour_data = hour.copy()
                hour_data['date'] = date
                hourly_data.append(hour_data)
    
    df_hourly = pd.DataFrame(hourly_data)
    print(f"Hourly data shape: {df_hourly.shape}")
    print(f"Columns: {df_hourly.columns.tolist()}")
    
    return df_hourly

In [6]:
# Process the data
if weather_data:
    print("\n=== Processing Daily Data ===")
    df_daily = process_daily_data(weather_data)
    
    print("\n=== Processing Hourly Data ===")
    df_hourly = process_hourly_data(weather_data)
    
    # Display first few rows
    if df_daily is not None:
        print("\nDaily data preview:")
        display(df_daily.head())
    
    if df_hourly is not None:
        print("\nHourly data preview:")
        display(df_hourly.head())


=== Processing Daily Data ===
Daily data shape: (16, 37)
Columns: ['datetime', 'datetimeEpoch', 'tempmax', 'tempmin', 'temp', 'feelslikemax', 'feelslikemin', 'feelslike', 'dew', 'humidity', 'precip', 'precipprob', 'precipcover', 'preciptype', 'snow', 'snowdepth', 'windgust', 'windspeed', 'winddir', 'pressure', 'cloudcover', 'visibility', 'solarradiation', 'solarenergy', 'uvindex', 'severerisk', 'sunrise', 'sunriseEpoch', 'sunset', 'sunsetEpoch', 'moonphase', 'conditions', 'description', 'icon', 'stations', 'source', 'hours']

=== Processing Hourly Data ===
Hourly data shape: (384, 26)
Columns: ['datetime', 'datetimeEpoch', 'temp', 'feelslike', 'humidity', 'dew', 'precip', 'precipprob', 'snow', 'snowdepth', 'preciptype', 'windgust', 'windspeed', 'winddir', 'pressure', 'visibility', 'cloudcover', 'solarradiation', 'solarenergy', 'uvindex', 'severerisk', 'conditions', 'icon', 'stations', 'source', 'date']

Daily data preview:


Unnamed: 0,datetime,datetimeEpoch,tempmax,tempmin,temp,feelslikemax,feelslikemin,feelslike,dew,humidity,...,sunriseEpoch,sunset,sunsetEpoch,moonphase,conditions,description,icon,stations,source,hours
0,2025-11-01,1761930000,21.0,20.0,20.5,21.0,20.0,20.5,19.2,92.2,...,1761951551,17:20:57,1761992457,0.36,"Rain, Overcast",Cloudy skies throughout the day with a chance ...,rain,[VVNB],obs,"[{'datetime': '00:00:00', 'datetimeEpoch': 176..."
1,2025-11-02,1762016400,20.0,19.0,19.5,20.0,19.0,19.5,18.2,92.6,...,1762037981,17:20:25,1762078825,0.39,"Rain, Overcast",Cloudy skies throughout the day with rain.,rain,[VVNB],obs,"[{'datetime': '00:00:00', 'datetimeEpoch': 176..."
2,2025-11-03,1762102800,19.0,18.0,18.2,19.0,18.0,18.2,17.0,92.7,...,1762124411,17:19:54,1762165194,0.42,"Rain, Overcast",Cloudy skies throughout the day with a chance ...,rain,[VVNB],obs,"[{'datetime': '00:00:00', 'datetimeEpoch': 176..."
3,2025-11-04,1762189200,23.0,18.0,19.9,23.0,18.0,19.9,17.0,83.7,...,1762210842,17:19:24,1762251564,0.46,"Rain, Overcast",Cloudy skies throughout the day with a chance ...,rain,[VVNB],obs,"[{'datetime': '00:00:00', 'datetimeEpoch': 176..."
4,2025-11-05,1762275600,24.0,19.0,21.4,24.0,19.0,21.4,17.9,80.9,...,1762297274,17:18:56,1762337936,0.5,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,[VVNB],obs,"[{'datetime': '00:00:00', 'datetimeEpoch': 176..."



Hourly data preview:


Unnamed: 0,datetime,datetimeEpoch,temp,feelslike,humidity,dew,precip,precipprob,snow,snowdepth,...,cloudcover,solarradiation,solarenergy,uvindex,severerisk,conditions,icon,stations,source,date
0,00:00:00,1761930000,21.0,21.0,94.02,20.0,1.1,100.0,0.0,0.0,...,100.0,0.0,0.0,0.0,10.0,"Rain, Overcast",rain,[VVNB],obs,2025-11-01
1,01:00:00,1761933600,21.0,21.0,94.02,20.0,1.0,100.0,0.0,0.0,...,100.0,0.0,0.0,0.0,10.0,"Rain, Overcast",rain,[VVNB],obs,2025-11-01
2,02:00:00,1761937200,21.0,21.0,94.02,20.0,0.8,100.0,0.0,0.0,...,100.0,0.0,0.0,0.0,10.0,"Rain, Overcast",rain,[VVNB],obs,2025-11-01
3,03:00:00,1761940800,21.0,21.0,94.02,20.0,0.7,100.0,0.0,0.0,...,100.0,0.0,0.0,0.0,10.0,"Rain, Overcast",rain,[VVNB],obs,2025-11-01
4,04:00:00,1761944400,21.0,21.0,94.02,20.0,0.8,100.0,0.0,0.0,...,100.0,0.0,0.0,0.0,10.0,"Rain, Overcast",rain,[VVNB],obs,2025-11-01


In [7]:
# Save data to dataset/raw directory
output_dir = "../dataset/raw"

# Create directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Generate timestamp for filename
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

# Save daily data
if df_daily is not None:
    daily_filename = f"Hanoi_Daily.csv"
    daily_filepath = os.path.join(output_dir, daily_filename)
    df_daily.to_csv(daily_filepath, index=False, encoding='utf-8')
    print(f"\n✓ Daily data saved to: {daily_filepath}")
    print(f"  Shape: {df_daily.shape}")

# Save hourly data
if df_hourly is not None:
    hourly_filename = f"Hanoi_Hourly.csv"
    hourly_filepath = os.path.join(output_dir, hourly_filename)
    df_hourly.to_csv(hourly_filepath, index=False, encoding='utf-8')
    print(f"\n✓ Hourly data saved to: {hourly_filepath}")
    print(f"  Shape: {df_hourly.shape}")


✓ Daily data saved to: ../dataset/raw\Hanoi_Daily.csv
  Shape: (16, 37)

✓ Hourly data saved to: ../dataset/raw\Hanoi_Hourly.csv
  Shape: (384, 26)
