<a href="https://colab.research.google.com/github/Rohit-hooda/DS5110-Final-Project/blob/main/Data_Extraction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install openmeteo-requests

In [None]:
pip install requests-cache retry-requests numpy pandas

In [6]:
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after=3600)
retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
openmeteo = openmeteo_requests.Client(session=retry_session)

# List of coordinates (latitude and longitude) for each county
coordinates_list = [
    {"latitude": 41.7003, "longitude": -70.3002, "county": "Barnstable"},
    {"latitude": 42.3118, "longitude": -73.1822, "county": "Berkshire"},
    {"latitude": 41.7938, "longitude": -71.1350, "county": "Bristol"},
    {"latitude": 41.4033, "longitude": -70.6693, "county": "Dukes"},
    {"latitude": 42.6334, "longitude": -70.7829, "county": "Essex"},
    {"latitude": 42.5795, "longitude": -72.6151, "county": "Franklin"},
    {"latitude": 42.1175, "longitude": -72.6009, "county": "Hampden"},
    {"latitude": 42.3389, "longitude": -72.6417, "county": "Hampshire"},
    {"latitude": 42.4672, "longitude": -71.2874, "county": "Middlesex"},
    {"latitude": 41.2835, "longitude": -70.0995, "county": "Nantucket"},
    {"latitude": 42.1621, "longitude": -71.1912, "county": "Norfolk"},
    {"latitude": 41.9880, "longitude": -70.7528, "county": "Plymouth"},
    {"latitude": 42.3601, "longitude": -71.0589, "county": "Suffolk"},
    {"latitude": 42.4002, "longitude": -71.9065, "county": "Worcester"}
]

# List to store each DataFrame for each location
all_dataframes = []

# Function to fetch and process weather data for a given set of coordinates
def fetch_weather_data(latitude, longitude, county_name):
    url = "https://historical-forecast-api.open-meteo.com/v1/forecast"
    params = {
        "latitude": latitude,
        "longitude": longitude,
        "start_date": "2024-01-01",
        "end_date": "2024-10-30",
        "daily": ["weather_code", "temperature_2m_max", "temperature_2m_min", "sunrise", "sunset",
                  "daylight_duration", "sunshine_duration", "uv_index_max", "uv_index_clear_sky_max",
                  "precipitation_sum", "rain_sum", "showers_sum", "snowfall_sum",
                  "precipitation_hours", "precipitation_probability_max", "wind_speed_10m_max", "wind_gusts_10m_max"],
        "timezone": "America/New_York"
    }
    responses = openmeteo.weather_api(url, params=params)

    # Process the response for the given coordinates
    response = responses[0]

    # Process daily data
    daily = response.Daily()
    daily_data = {
        "date": pd.date_range(
            start=pd.to_datetime(daily.Time(), unit="s", utc=True),
            end=pd.to_datetime(daily.TimeEnd(), unit="s", utc=True),
            freq=pd.Timedelta(seconds=daily.Interval()),
            inclusive="left"
        ),
        "county": county_name,
        "latitude": latitude,
        "longitude": longitude,
        "weather_code": daily.Variables(0).ValuesAsNumpy(),
        "temperature_2m_max": daily.Variables(1).ValuesAsNumpy(),
        "temperature_2m_min": daily.Variables(2).ValuesAsNumpy(),
        "sunrise": daily.Variables(3).ValuesAsNumpy(),
        "sunset": daily.Variables(4).ValuesAsNumpy(),
        "daylight_duration": daily.Variables(5).ValuesAsNumpy(),
        "sunshine_duration": daily.Variables(6).ValuesAsNumpy(),
        "uv_index_max": daily.Variables(7).ValuesAsNumpy(),
        "uv_index_clear_sky_max": daily.Variables(8).ValuesAsNumpy(),
        "precipitation_sum": daily.Variables(9).ValuesAsNumpy(),
        "rain_sum": daily.Variables(10).ValuesAsNumpy(),
        "showers_sum": daily.Variables(11).ValuesAsNumpy(),
        "snowfall_sum": daily.Variables(12).ValuesAsNumpy(),
        "precipitation_hours": daily.Variables(13).ValuesAsNumpy(),
        "precipitation_probability_max": daily.Variables(14).ValuesAsNumpy(),
        "wind_speed_10m_max": daily.Variables(15).ValuesAsNumpy(),
        "wind_gusts_10m_max": daily.Variables(16).ValuesAsNumpy()
    }

    # Convert to DataFrame and add to list
    daily_dataframe = pd.DataFrame(data=daily_data)
    all_dataframes.append(daily_dataframe)

# Loop through each set of coordinates and fetch weather data
for coords in coordinates_list:
    fetch_weather_data(coords["latitude"], coords["longitude"], coords["county"])

# Combine all dataframes into a single dataframe
final_dataframe = pd.concat(all_dataframes, ignore_index=True)

# Export to CSV
final_dataframe.to_csv("massachusetts_counties_weather_data.csv", index=False)
print("Data has been saved to massachusetts_counties_weather_data.csv")


Data has been saved to massachusetts_counties_weather_data.csv
