In [None]:
# !pip install countryinfo geopy pandas requests

In [None]:
import time
from countryinfo import CountryInfo
import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim
from tqdm import tqdm 

import requests
import json
import csv
from datetime import datetime

import glob

In [None]:
# geolocator = Nominatim(user_agent="vietnam_locations_app")

# def get_lat_lon(place):
#     """Fetch latitude and longitude for a given place."""
#     try:
#         location = geolocator.geocode(place, timeout=10)
#         if location:
#             return location.latitude, location.longitude
#         else:
#             return None, None
#     except Exception as e:
#         print(f"Error geocoding {place}: {e}")
#         return None, None

# def gather_vietnam_data():
#     """Collect latitude and longitude for all provinces in Vietnam."""
#     data = []
    
#     try:
#         vietnam = CountryInfo("Vietnam")
#         provinces = vietnam.provinces() if hasattr(vietnam, "provinces") else ["Vietnam"]

#         for province in tqdm(provinces, desc="Processing provinces"):
#             province_query = f"{province}, Vietnam"
#             prov_lat, prov_lon = get_lat_lon(province_query)

#             data.append({
#                 "Country": "Vietnam",
#                 "State/Province": province,
#                 "Latitude": prov_lat,
#                 "Longitude": prov_lon
#             })

#             time.sleep(1)  # Prevents rate limiting

#     except Exception as e:
#         print(f"Error processing Vietnam: {e}")

#     return data


# print("Gathering Vietnam location data...")
# vietnam_data = gather_vietnam_data()

# df = pd.DataFrame(vietnam_data)

# df = df.dropna(subset=['Latitude', 'Longitude'], how='all')

# num_parts = 5
# split_dfs = np.array_split(df, num_parts)

# for i, part_df in enumerate(split_dfs, start=1):
#     output_file = f"vietnam_locations_part_{i}.csv"
#     part_df.to_csv(output_file, index=False)
#     print(f"Saved {len(part_df)} entries to {output_file}")

# print(f"Total entries: {len(df)} (split into {num_parts} parts)")


In [None]:
BASE_URL = "https://archive-api.open-meteo.com/v1/archive"

start_year = 2019  
end_year = 2019   

location_files = [
    "vietnam_locations_part_1.csv",
    "vietnam_locations_part_2.csv",
    "vietnam_locations_part_3.csv",
    "vietnam_locations_part_4.csv",
    "vietnam_locations_part_5.csv"
]

def get_historical_weather(province, lat, lon):
    """Fetch historical weather data for a given province with retry mechanism."""
    start_date = f"{start_year}-01-01"
    end_date = f"{end_year}-12-31"

    params = {
        "latitude": lat,
        "longitude": lon,
        "start_date": start_date,
        "end_date": end_date,
        "daily": [
            "weathercode", "temperature_2m_max", "temperature_2m_min",
            "apparent_temperature_max", "apparent_temperature_min",
            "sunrise", "sunset", "daylight_duration", "sunshine_duration",
            "uv_index_max", "uv_index_clear_sky_max", "precipitation_sum",
            "rain_sum", "showers_sum", "snowfall_sum", "precipitation_hours",
            "precipitation_probability_max", "windspeed_10m_max",
            "windgusts_10m_max", "winddirection_10m_dominant",
            "shortwave_radiation_sum", "et0_fao_evapotranspiration"
        ],
        "timezone": "auto"
    }

    retries = 5 
    wait_time = 10  

    for attempt in range(retries):
        response = requests.get(BASE_URL, params=params)

        if response.status_code == 200:
            return response.json()
        elif response.status_code == 429:
            print(f"Rate limit reached for {province}. Retrying in {wait_time} seconds...")
            time.sleep(wait_time)
            wait_time *= 2  
        else:
            print(f"Error fetching data for {province}: {response.status_code}")
            return None

    print(f"Skipping {province} after {retries} failed attempts.")
    return None

for file_num, location_file in enumerate(location_files, start=1):
    print(f"\nProcessing file: {location_file}")
    province_data = pd.read_csv(location_file)
    
    weather_history = []
    
    for _, row in tqdm(province_data.iterrows(), total=len(province_data), desc=f"Fetching Weather Data (Part {file_num})"):
        province = row["State/Province"]
        lat, lon = row["Latitude"], row["Longitude"]

        print(f"Fetching data for {province}...")
        data = get_historical_weather(province, lat, lon)

        if data and "daily" in data:
            daily_data = data["daily"]
            dates = daily_data["time"]

            for i in range(len(dates)): 
                weather_history.append([
                    province,
                    dates[i],
                    daily_data["weathercode"][i],
                    daily_data["temperature_2m_max"][i],
                    daily_data["temperature_2m_min"][i],
                    daily_data["apparent_temperature_max"][i],
                    daily_data["apparent_temperature_min"][i],
                    daily_data["sunrise"][i],
                    daily_data["sunset"][i],
                    daily_data["daylight_duration"][i],
                    daily_data["sunshine_duration"][i],
                    daily_data["uv_index_max"][i],
                    daily_data["uv_index_clear_sky_max"][i],
                    daily_data["precipitation_sum"][i],
                    daily_data["rain_sum"][i],
                    daily_data["showers_sum"][i],
                    daily_data["snowfall_sum"][i],
                    daily_data["precipitation_hours"][i],
                    daily_data["precipitation_probability_max"][i],
                    daily_data["windspeed_10m_max"][i],
                    daily_data["windgusts_10m_max"][i],
                    daily_data["winddirection_10m_dominant"][i],
                    daily_data["shortwave_radiation_sum"][i],
                    daily_data["et0_fao_evapotranspiration"][i]
                ])

        time.sleep(1) 

    csv_filename = f"vietnam_historical_weather_part_{file_num}.csv"
    with open(csv_filename, "w", newline="") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow([
            "Province", "Date", "Weather Code", "Max Temp (°C)", "Min Temp (°C)", 
            "Max Apparent Temp (°C)", "Min Apparent Temp (°C)", "Sunrise", "Sunset",
            "Daylight Duration (s)", "Sunshine Duration (s)", "UV Index Max", 
            "UV Index Clear Sky Max", "Precipitation (mm)", "Rain (mm)", 
            "Showers (mm)", "Snowfall (mm)", "Precipitation Hours", 
            "Precipitation Probability Max (%)", "Max Wind Speed (m/s)", 
            "Max Wind Gusts (m/s)", "Dominant Wind Direction (°)", 
            "Shortwave Radiation Sum (MJ/m²)", "Reference Evapotranspiration (mm)"
        ])
        writer.writerows(weather_history)

    print(f"Historical weather data saved to {csv_filename}")


Processing file: vietnam_locations_part_1.csv


Fetching Weather Data (Part 1):   0%|          | 0/13 [00:00<?, ?it/s]

Fetching data for An Giang...
Rate limit reached for An Giang. Retrying in 10 seconds...
Rate limit reached for An Giang. Retrying in 20 seconds...
Rate limit reached for An Giang. Retrying in 40 seconds...
Rate limit reached for An Giang. Retrying in 80 seconds...
Rate limit reached for An Giang. Retrying in 160 seconds...
Skipping An Giang after 5 failed attempts.


Fetching Weather Data (Part 1):   8%|▊         | 1/13 [05:17<1:03:26, 317.21s/it]

Fetching data for Ba Ria-Vung Tau...
Rate limit reached for Ba Ria-Vung Tau. Retrying in 10 seconds...
Rate limit reached for Ba Ria-Vung Tau. Retrying in 20 seconds...
Rate limit reached for Ba Ria-Vung Tau. Retrying in 40 seconds...
Rate limit reached for Ba Ria-Vung Tau. Retrying in 80 seconds...
Rate limit reached for Ba Ria-Vung Tau. Retrying in 160 seconds...


In [None]:
# csv_files = glob.glob("vietnam_historical_weather_part_*.csv")

# merged_df = pd.concat([pd.read_csv(file) for file in csv_files], ignore_index=True)

# merged_filename = "vietnam_historical_weather_total.csv"
# merged_df.to_csv(merged_filename, index=False)