In [None]:
# !pip install countryinfo geopy pandas requests

In [2]:
import time
from countryinfo import CountryInfo
import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim
from tqdm import tqdm 

import requests
import json
import csv
from datetime import datetime

import glob

In [None]:
# geolocator = Nominatim(user_agent="vietnam_locations_app")

# def get_lat_lon(place):
#     """Fetch latitude and longitude for a given place."""
#     try:
#         location = geolocator.geocode(place, timeout=10)
#         if location:
#             return location.latitude, location.longitude
#         else:
#             return None, None
#     except Exception as e:
#         print(f"Error geocoding {place}: {e}")
#         return None, None

# def gather_vietnam_data():
#     """Collect latitude and longitude for all provinces in Vietnam."""
#     data = []
    
#     try:
#         vietnam = CountryInfo("Vietnam")
#         provinces = vietnam.provinces() if hasattr(vietnam, "provinces") else ["Vietnam"]

#         for province in tqdm(provinces, desc="Processing provinces"):
#             province_query = f"{province}, Vietnam"
#             prov_lat, prov_lon = get_lat_lon(province_query)

#             data.append({
#                 "Country": "Vietnam",
#                 "State/Province": province,
#                 "Latitude": prov_lat,
#                 "Longitude": prov_lon
#             })

#             time.sleep(1)  # Prevents rate limiting

#     except Exception as e:
#         print(f"Error processing Vietnam: {e}")

#     return data


# print("Gathering Vietnam location data...")
# vietnam_data = gather_vietnam_data()

# df = pd.DataFrame(vietnam_data)

# df = df.dropna(subset=['Latitude', 'Longitude'], how='all')

# num_parts = 5
# split_dfs = np.array_split(df, num_parts)

# for i, part_df in enumerate(split_dfs, start=1):
#     output_file = f"vietnam_locations_part_{i}.csv"
#     part_df.to_csv(output_file, index=False)
#     print(f"Saved {len(part_df)} entries to {output_file}")

# print(f"Total entries: {len(df)} (split into {num_parts} parts)")


In [3]:
BASE_URL = "https://archive-api.open-meteo.com/v1/archive"

start_year = 2019
end_year = 2019

location_files = [
    "vietnam_locations_part_1.csv",
    "vietnam_locations_part_2.csv",
    "vietnam_locations_part_3.csv",
    "vietnam_locations_part_4.csv",
    "vietnam_locations_part_5.csv"
]

def get_historical_weather(province, lat, lon):
    """Fetch historical weather data for a given province with retry mechanism."""
    start_date = f"{start_year}-01-01"
    end_date = f"{end_year}-12-31"

    params = {
        "latitude": lat,
        "longitude": lon,
        "start_date": start_date,
        "end_date": end_date,
        "daily": [
            "weathercode", "temperature_2m_max", "temperature_2m_min",
            "apparent_temperature_max", "apparent_temperature_min",
            "sunrise", "sunset", "daylight_duration", "sunshine_duration",
            "uv_index_max", "uv_index_clear_sky_max", "precipitation_sum",
            "rain_sum", "showers_sum", "snowfall_sum", "precipitation_hours",
            "precipitation_probability_max", "windspeed_10m_max",
            "windgusts_10m_max", "winddirection_10m_dominant",
            "shortwave_radiation_sum", "et0_fao_evapotranspiration"
        ],
        "timezone": "auto"
    }

    retries = 5 
    wait_time = 10  

    for _ in range(retries):
        response = requests.get(BASE_URL, params=params)

        if response.status_code == 200:
            return response.json()
        elif response.status_code == 429:
            print(f"Rate limit reached for {province}. Retrying in {wait_time} seconds...")
            time.sleep(wait_time)
            wait_time *= 2  
        else:
            print(f"Error fetching data for {province}: {response.status_code}")
            return None

    print(f"Skipping {province} after {retries} failed attempts.")
    return None

for file_num, location_file in enumerate(location_files, start=1):
    print(f"\nProcessing file: {location_file}")
    province_data = pd.read_csv(location_file)
    
    weather_history = []
    
    for _, row in tqdm(province_data.iterrows(), total=len(province_data), desc=f"Fetching Weather Data (Part {file_num})"):
        province = row["State/Province"]
        lat, lon = row["Latitude"], row["Longitude"]

        print(f"Fetching data for {province}...")
        data = get_historical_weather(province, lat, lon)

        if data and "daily" in data:
            daily_data = data["daily"]
            dates = daily_data["time"]

            for i in range(len(dates)): 
                weather_history.append([
                    province,
                    dates[i],
                    daily_data["weathercode"][i],
                    daily_data["temperature_2m_max"][i],
                    daily_data["temperature_2m_min"][i],
                    daily_data["apparent_temperature_max"][i],
                    daily_data["apparent_temperature_min"][i],
                    daily_data["sunrise"][i],
                    daily_data["sunset"][i],
                    daily_data["daylight_duration"][i],
                    daily_data["sunshine_duration"][i],
                    daily_data["uv_index_max"][i],
                    daily_data["uv_index_clear_sky_max"][i],
                    daily_data["precipitation_sum"][i],
                    daily_data["rain_sum"][i],
                    daily_data["showers_sum"][i],
                    daily_data["snowfall_sum"][i],
                    daily_data["precipitation_hours"][i],
                    daily_data["precipitation_probability_max"][i],
                    daily_data["windspeed_10m_max"][i],
                    daily_data["windgusts_10m_max"][i],
                    daily_data["winddirection_10m_dominant"][i],
                    daily_data["shortwave_radiation_sum"][i],
                    daily_data["et0_fao_evapotranspiration"][i]
                ])

        time.sleep(1) 

    csv_filename = f"vietnam_historical_weather_part_{file_num}.csv"
    with open(csv_filename, "w", newline="") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow([
            "Province", "Date", "Weather Code", "Max Temp (°C)", "Min Temp (°C)", 
            "Max Apparent Temp (°C)", "Min Apparent Temp (°C)", "Sunrise", "Sunset",
            "Daylight Duration (s)", "Sunshine Duration (s)", "UV Index Max", 
            "UV Index Clear Sky Max", "Precipitation (mm)", "Rain (mm)", 
            "Showers (mm)", "Snowfall (mm)", "Precipitation Hours", 
            "Precipitation Probability Max (%)", "Max Wind Speed (m/s)", 
            "Max Wind Gusts (m/s)", "Dominant Wind Direction (°)", 
            "Shortwave Radiation Sum (MJ/m²)", "Reference Evapotranspiration (mm)"
        ])
        writer.writerows(weather_history)

    print(f"Historical weather data saved to {csv_filename}")


Processing file: vietnam_locations_part_1.csv


Fetching Weather Data (Part 1):   0%|          | 0/13 [00:00<?, ?it/s]

Fetching data for An Giang...


Fetching Weather Data (Part 1):   8%|▊         | 1/13 [00:03<00:40,  3.37s/it]

Fetching data for Ba Ria-Vung Tau...


Fetching Weather Data (Part 1):  15%|█▌        | 2/13 [00:06<00:32,  2.97s/it]

Fetching data for Bac Giang...


Fetching Weather Data (Part 1):  23%|██▎       | 3/13 [00:08<00:29,  2.93s/it]

Fetching data for Bac Kan...


Fetching Weather Data (Part 1):  31%|███       | 4/13 [00:11<00:25,  2.88s/it]

Fetching data for Bac Lieu...


Fetching Weather Data (Part 1):  38%|███▊      | 5/13 [00:14<00:23,  2.91s/it]

Fetching data for Bac Ninh...


Fetching Weather Data (Part 1):  46%|████▌     | 6/13 [00:17<00:20,  2.91s/it]

Fetching data for Ben Tre...


Fetching Weather Data (Part 1):  54%|█████▍    | 7/13 [00:20<00:16,  2.83s/it]

Fetching data for Binh Dinh...


Fetching Weather Data (Part 1):  62%|██████▏   | 8/13 [00:23<00:14,  2.97s/it]

Fetching data for Binh Duong...


Fetching Weather Data (Part 1):  69%|██████▉   | 9/13 [00:26<00:12,  3.01s/it]

Fetching data for Binh Phuoc...


Fetching Weather Data (Part 1):  77%|███████▋  | 10/13 [00:29<00:09,  3.05s/it]

Fetching data for Binh Thuan...


Fetching Weather Data (Part 1):  85%|████████▍ | 11/13 [00:33<00:06,  3.19s/it]

Fetching data for Ca Mau...


Fetching Weather Data (Part 1):  92%|█████████▏| 12/13 [00:36<00:03,  3.15s/it]

Fetching data for Can Tho...


Fetching Weather Data (Part 1): 100%|██████████| 13/13 [00:39<00:00,  3.01s/it]


Historical weather data saved to vietnam_historical_weather_part_1.csv

Processing file: vietnam_locations_part_2.csv


Fetching Weather Data (Part 2):   0%|          | 0/12 [00:00<?, ?it/s]

Fetching data for Cao Bang...


Fetching Weather Data (Part 2):   8%|▊         | 1/12 [00:03<00:35,  3.24s/it]

Fetching data for Da Nang...


Fetching Weather Data (Part 2):  17%|█▋        | 2/12 [00:07<00:36,  3.62s/it]

Fetching data for Dac Lak...


Fetching Weather Data (Part 2):  25%|██▌       | 3/12 [00:11<00:34,  3.83s/it]

Fetching data for Dong Nai...


Fetching Weather Data (Part 2):  33%|███▎      | 4/12 [00:13<00:27,  3.41s/it]

Fetching data for Dong Thap...
Rate limit reached for Dong Thap. Retrying in 10 seconds...
Rate limit reached for Dong Thap. Retrying in 20 seconds...


Fetching Weather Data (Part 2):  42%|████▏     | 5/12 [00:48<01:43, 14.78s/it]

Fetching data for Gia Lai...


Fetching Weather Data (Part 2):  50%|█████     | 6/12 [00:51<01:03, 10.65s/it]

Fetching data for Ha Giang...


Fetching Weather Data (Part 2):  58%|█████▊    | 7/12 [00:54<00:39,  7.99s/it]

Fetching data for Ha Nam...


Fetching Weather Data (Part 2):  67%|██████▋   | 8/12 [00:56<00:24,  6.24s/it]

Fetching data for Ha Noi...


Fetching Weather Data (Part 2):  75%|███████▌  | 9/12 [00:59<00:15,  5.07s/it]

Fetching data for Ha Tay...


Fetching Weather Data (Part 2):  83%|████████▎ | 10/12 [01:01<00:08,  4.33s/it]

Fetching data for Ha Tinh...


Fetching Weather Data (Part 2):  92%|█████████▏| 11/12 [01:04<00:03,  3.88s/it]

Fetching data for Hai Duong...


Fetching Weather Data (Part 2): 100%|██████████| 12/12 [01:07<00:00,  5.59s/it]


Historical weather data saved to vietnam_historical_weather_part_2.csv

Processing file: vietnam_locations_part_3.csv


Fetching Weather Data (Part 3):   0%|          | 0/12 [00:00<?, ?it/s]

Fetching data for Hai Phong...


Fetching Weather Data (Part 3):   8%|▊         | 1/12 [00:02<00:27,  2.48s/it]

Fetching data for Ho Chi Minh...


Fetching Weather Data (Part 3):  17%|█▋        | 2/12 [00:05<00:25,  2.50s/it]

Fetching data for Hoa Binh...


Fetching Weather Data (Part 3):  25%|██▌       | 3/12 [00:07<00:23,  2.62s/it]

Fetching data for Hung Yen...
Rate limit reached for Hung Yen. Retrying in 10 seconds...
Rate limit reached for Hung Yen. Retrying in 20 seconds...


Fetching Weather Data (Part 3):  33%|███▎      | 4/12 [00:42<02:02, 15.27s/it]

Fetching data for Khanh Hoa...


Fetching Weather Data (Part 3):  42%|████▏     | 5/12 [00:44<01:13, 10.53s/it]

Fetching data for Kien Giang...


Fetching Weather Data (Part 3):  50%|█████     | 6/12 [00:46<00:46,  7.71s/it]

Fetching data for Kon Tum...


Fetching Weather Data (Part 3):  58%|█████▊    | 7/12 [00:48<00:29,  5.91s/it]

Fetching data for Lai Chau...


Fetching Weather Data (Part 3):  67%|██████▋   | 8/12 [00:52<00:19,  4.99s/it]

Fetching data for Lam Dong...


Fetching Weather Data (Part 3):  75%|███████▌  | 9/12 [00:54<00:12,  4.07s/it]

Fetching data for Lang Son...


Fetching Weather Data (Part 3):  83%|████████▎ | 10/12 [00:56<00:07,  3.54s/it]

Fetching data for Lao Cai...


Fetching Weather Data (Part 3):  92%|█████████▏| 11/12 [00:58<00:03,  3.08s/it]

Fetching data for Long An...


Fetching Weather Data (Part 3): 100%|██████████| 12/12 [01:00<00:00,  5.04s/it]


Historical weather data saved to vietnam_historical_weather_part_3.csv

Processing file: vietnam_locations_part_4.csv


Fetching Weather Data (Part 4):   0%|          | 0/12 [00:00<?, ?it/s]

Fetching data for Nam Dinh...


Fetching Weather Data (Part 4):   8%|▊         | 1/12 [00:02<00:24,  2.20s/it]

Fetching data for Nghe An...


Fetching Weather Data (Part 4):  17%|█▋        | 2/12 [00:04<00:22,  2.23s/it]

Fetching data for Ninh Binh...
Rate limit reached for Ninh Binh. Retrying in 10 seconds...
Rate limit reached for Ninh Binh. Retrying in 20 seconds...


Fetching Weather Data (Part 4):  25%|██▌       | 3/12 [00:38<02:30, 16.70s/it]

Fetching data for Ninh Thuan...


Fetching Weather Data (Part 4):  33%|███▎      | 4/12 [00:40<01:27, 10.95s/it]

Fetching data for Phu Tho...


Fetching Weather Data (Part 4):  42%|████▏     | 5/12 [00:42<00:54,  7.82s/it]

Fetching data for Phu Yen...


Fetching Weather Data (Part 4):  50%|█████     | 6/12 [00:44<00:35,  5.86s/it]

Fetching data for Quang Binh...


Fetching Weather Data (Part 4):  58%|█████▊    | 7/12 [00:47<00:23,  4.70s/it]

Fetching data for Quang Nam...


Fetching Weather Data (Part 4):  67%|██████▋   | 8/12 [00:49<00:15,  3.93s/it]

Fetching data for Quang Ngai...


Fetching Weather Data (Part 4):  75%|███████▌  | 9/12 [00:51<00:10,  3.43s/it]

Fetching data for Quang Ninh...


Fetching Weather Data (Part 4):  83%|████████▎ | 10/12 [00:53<00:06,  3.00s/it]

Fetching data for Quang Tri...


Fetching Weather Data (Part 4):  92%|█████████▏| 11/12 [00:56<00:02,  2.76s/it]

Fetching data for Soc Trang...


Fetching Weather Data (Part 4): 100%|██████████| 12/12 [00:58<00:00,  4.86s/it]


Historical weather data saved to vietnam_historical_weather_part_4.csv

Processing file: vietnam_locations_part_5.csv


Fetching Weather Data (Part 5):   0%|          | 0/12 [00:00<?, ?it/s]

Fetching data for Son La...


Fetching Weather Data (Part 5):   8%|▊         | 1/12 [00:02<00:22,  2.02s/it]

Fetching data for Tay Ninh...
Rate limit reached for Tay Ninh. Retrying in 10 seconds...
Rate limit reached for Tay Ninh. Retrying in 20 seconds...


Fetching Weather Data (Part 5):  17%|█▋        | 2/12 [00:37<03:34, 21.45s/it]

Fetching data for Thai Binh...


Fetching Weather Data (Part 5):  25%|██▌       | 3/12 [00:41<02:01, 13.52s/it]

Fetching data for Thai Nguyen...


Fetching Weather Data (Part 5):  33%|███▎      | 4/12 [00:45<01:19,  9.89s/it]

Fetching data for Thanh Hoa...


Fetching Weather Data (Part 5):  42%|████▏     | 5/12 [00:49<00:53,  7.62s/it]

Fetching data for Thua Thien-Hue...


Fetching Weather Data (Part 5):  50%|█████     | 6/12 [00:53<00:39,  6.53s/it]

Fetching data for Tien Giang...


Fetching Weather Data (Part 5):  58%|█████▊    | 7/12 [00:57<00:29,  5.86s/it]

Fetching data for Tra Vinh...


Fetching Weather Data (Part 5):  67%|██████▋   | 8/12 [01:01<00:20,  5.20s/it]

Fetching data for Tuyen Quang...


Fetching Weather Data (Part 5):  75%|███████▌  | 9/12 [01:05<00:14,  4.84s/it]

Fetching data for Vinh Long...


Fetching Weather Data (Part 5):  83%|████████▎ | 10/12 [01:11<00:10,  5.26s/it]

Fetching data for Vinh Phuc...


Fetching Weather Data (Part 5):  92%|█████████▏| 11/12 [01:17<00:05,  5.19s/it]

Fetching data for Yen Bai...


Fetching Weather Data (Part 5): 100%|██████████| 12/12 [01:20<00:00,  6.70s/it]

Historical weather data saved to vietnam_historical_weather_part_5.csv





In [4]:
csv_files = glob.glob("vietnam_historical_weather_part_*.csv")

merged_df = pd.concat([pd.read_csv(file) for file in csv_files], ignore_index=True)

merged_filename = "vn_weather_2019.csv"
merged_df.to_csv(merged_filename, index=False)