In [62]:
import requests
import pandas as pd

# API endpoint
url = "http://api.worldweatheronline.com/premium/v1/past-weather.ashx"

# load API key from text file
with open('api_key_weather.txt', 'r') as file:
    api_key = file.read().replace('\n', '')
    
# Parameters
params = {
    "key": str(api_key),
    "q": "Dublin",  # Query location
    "format": "json",
    "date": "2021-05-01",  # Start date for historical data
    "enddate": "2022-08-31",  # End date for historical data
    "includelocation": "yes", 
    "tp": "1"  # Time period: 1 hour
}  

# Making the GET request
response = requests.get(url, params=params)

# Check if the request was successful
if response.status_code == 200:
    # Parse the JSON response
    data = response.json()
    
    # Extracting location data
    location_data = data.get('data', {}).get('nearest_area', [{}])[0]
    latitude = location_data.get('latitude', 'Unknown')
    longitude = location_data.get('longitude', 'Unknown')
    
    # Extracting the weather data
    weather_data = data.get('data', {}).get('weather', [])
    
    # Creating a list to store each day's weather information
    weather_list = []
    for day in weather_data:
        hourly_list = day.get('hourly', [])
        for hourly_data in hourly_list:
            hour_value = hourly_data.get('time', 'Unknown')  # This should extract the time for each hour
            if hour_value != 'Unknown':
                hour_formatted = f"{int(hour_value)//100:02d}:00" if hour_value != '0' else "00:00"
            else:
                hour_formatted = hour_value
            day_data = {
                'date': day['date'],
                'hour': hour_formatted,
                'avgtempC': day['avgtempC'], 
                'maxtempC': day['maxtempC'],
                'mintempC': day['mintempC'],
                'sunHour': day['sunHour'],
                'uvIndex': day['uvIndex'],
                'humidity': hourly_data['humidity'],
                'winddirDegree': hourly_data['winddirDegree'],
                'windspeedKmph': hourly_data['windspeedKmph'],
                'cloudcover': hourly_data['cloudcover'],
                'precipMM': hourly_data['precipMM'],
                'pressure': hourly_data['pressure'],
                'latitude': latitude,
                'longitude': longitude,
            }
            weather_list.append(day_data)

    
    # Convert the list of dictionaries to a pandas DataFrame
    weather_df = pd.DataFrame(weather_list)
    
    # print(weather_df.head())  # Print the first few rows to verify
    
    # Optionally, save the DataFrame to a CSV file
    # weather_df.to_csv('dublin_weather_may2021_aug2022_extended.csv', index=False)
else:
    print("Failed to fetch data. Status Code:", response.status_code)

In [65]:
#weather_df.to_csv('dublin_weather_may2021_aug2022_extended.csv', index=False)
weather_df


Unnamed: 0,date,hour,avgtempC,maxtempC,mintempC,sunHour,uvIndex,humidity,winddirDegree,windspeedKmph,cloudcover,precipMM,pressure,latitude,longitude
0,2021-05-01,00:00,6,8,3,13.0,3,80,305,9,5,0.0,1017,53.333,-6.249
1,2021-05-01,01:00,6,8,3,13.0,3,82,306,10,15,0.0,1017,53.333,-6.249
2,2021-05-01,02:00,6,8,3,13.0,3,83,306,11,25,0.0,1017,53.333,-6.249
3,2021-05-01,03:00,6,8,3,13.0,3,85,307,12,35,0.0,1017,53.333,-6.249
4,2021-05-01,04:00,6,8,3,13.0,3,84,308,12,41,0.0,1017,53.333,-6.249
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
835,2021-06-04,19:00,13,16,9,12.0,4,78,174,19,57,0.0,1023,53.333,-6.249
836,2021-06-04,20:00,13,16,9,12.0,4,83,176,18,39,0.0,1024,53.333,-6.249
837,2021-06-04,21:00,13,16,9,12.0,4,88,178,16,20,0.0,1024,53.333,-6.249
838,2021-06-04,22:00,13,16,9,12.0,4,88,178,16,43,0.0,1024,53.333,-6.249
