In [14]:
import requests
import pandas as pd

#API endpoint
url = "http://api.worldweatheronline.com/premium/v1/past-weather.ashx"

# load API key from text file
with open('api_key_weather.txt', 'r') as file:
    api_key = file.read().replace('\n', '')
    
# Parameters
# this is how the url changes 


params = {
    "key": str(api_key),
    "q": "Dublin",  # Query location
    "format": "json",
    "date": "2021-08-05",  # Start date for historical data
    "enddate": "2021-09-04",  # End date for historical data
    "includelocation": "yes", 
    "tp": "1"  # Time period: 1 hour
}  

# there needs to be a loop that goes through the time range, for each month in this time range, and then you make the request
# special function that specifies every months by simply giving it a range 

# 
# Making the GET request
response = requests.get(url, params=params)

# Check if the request was successful
if response.status_code == 200:
    # Parse the JSON response
    data = response.json()
    
    # Extracting location data
    location_data = data.get('data', {}).get('nearest_area', [{}])[0]
    latitude = location_data.get('latitude', 'Unknown')
    longitude = location_data.get('longitude', 'Unknown')
    
    # Extracting the weather data
    weather_data = data.get('data', {}).get('weather', [])
    
    # Creating a list to store each day's weather information
    weather_list = []
    for day in weather_data:
        hourly_list = day.get('hourly', [])
        for hourly_data in hourly_list:
            hour_value = hourly_data.get('time', 'Unknown')  # This should extract the time for each hour
            if hour_value != 'Unknown':
                hour_formatted = f"{int(hour_value)//100:02d}:00" if hour_value != '0' else "00:00"
            else:
                hour_formatted = hour_value
            day_data = {
                'date': day['date'],
                'hour': hour_formatted,
                'avgtempC': day['avgtempC'], 
                'maxtempC': day['maxtempC'],
                'mintempC': day['mintempC'],
                'sunHour': day['sunHour'],
                'uvIndex': day['uvIndex'],
                'humidity': hourly_data['humidity'],
                'winddirDegree': hourly_data['winddirDegree'],
                'windspeedKmph': hourly_data['windspeedKmph'],
                'cloudcover': hourly_data['cloudcover'],
                'precipMM': hourly_data['precipMM'],
                'pressure': hourly_data['pressure'],
                'latitude': latitude,
                'longitude': longitude,
            }
            weather_list.append(day_data)

    
    # Convert the list of dictionaries to a pandas DataFrame
    weather_df = pd.DataFrame(weather_list)
    
    # print(weather_df.head())  # Print the first few rows to verify
    
    # Optionally, save the DataFrame to a CSV file
    # weather_df.to_csv('dublin_weather_may2021_aug2022_extended.csv', index=False)
    # dynamically save the file with the date range
    weather_df.to_csv(f'dublin_weather_{params["date"]}_{params["enddate"]}.csv', index=False)
    # all the months go in the "Months folder" 
    # from that folder, merge everythign together 
else:
    print("Failed to fetch data. Status Code:", response.status_code)

In [15]:
from datetime import datetime

# Loop through each entry and combine the date and hour into a datetime object
for item in weather_list:
    # Ensure that 'date' and 'hour' are strings and not integers or 'Unknown'
    date_str = item['date']  # 'YYYY-MM-DD'
    hour_str = item['hour']  # 'HH:00' format

    # Combine the date and hour strings
    datetime_str = f"{date_str} {hour_str}"
    
    # Create a datetime object using datetime.strptime
    item['datetime'] = datetime.strptime(datetime_str, "%Y-%m-%d %H:%M")

# Now each item in weather_list has a 'datetime' key with a datetime object as its value

# If you want to create a DataFrame from weather_list, you can do so like this:
weather_df = pd.DataFrame(weather_list)

# If you want 'datetime' to be the index of the DataFrame, you can do that as well:
weather_df.set_index('datetime', inplace=True)


In [16]:
weather_df.to_csv('hourly_dublin_weather_2021-08-05-2021-09-04_extended.csv', index=False)
weather_df



Unnamed: 0_level_0,date,hour,avgtempC,maxtempC,mintempC,sunHour,uvIndex,humidity,winddirDegree,windspeedKmph,cloudcover,precipMM,pressure,latitude,longitude
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2021-08-05 00:00:00,2021-08-05,00:00,15,17,13,7.0,3,91,187,15,80,0.0,1007,53.333,-6.249
2021-08-05 01:00:00,2021-08-05,01:00,15,17,13,7.0,3,90,183,15,78,0.0,1006,53.333,-6.249
2021-08-05 02:00:00,2021-08-05,02:00,15,17,13,7.0,3,89,179,12,76,0.0,1005,53.333,-6.249
2021-08-05 03:00:00,2021-08-05,03:00,15,17,13,7.0,3,88,175,13,74,0.0,1004,53.333,-6.249
2021-08-05 04:00:00,2021-08-05,04:00,15,17,13,7.0,3,88,169,16,69,0.0,1003,53.333,-6.249
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-09-04 19:00:00,2021-09-04,19:00,14,17,12,10.0,4,80,115,8,17,0.0,1017,53.333,-6.249
2021-09-04 20:00:00,2021-09-04,20:00,14,17,12,10.0,4,82,118,7,27,0.0,1017,53.333,-6.249
2021-09-04 21:00:00,2021-09-04,21:00,14,17,12,10.0,4,84,120,6,37,0.0,1017,53.333,-6.249
2021-09-04 22:00:00,2021-09-04,22:00,14,17,12,10.0,4,85,132,6,39,0.0,1017,53.333,-6.249


In [20]:
# merge hourly_dublin_weather_2021-07-05-2021-08-04_extended.csv and  hourly_dublin_weather_2021-08-05-2021-09-04_extended.csv

# Load the two CSV files into DataFrames
df1 = pd.read_csv('hourly_dublin_weather_2021-07-05- 2021-08-04_extended.csv')
df1

Unnamed: 0,date,hour,avgtempC,maxtempC,mintempC,sunHour,uvIndex,humidity,winddirDegree,windspeedKmph,cloudcover,precipMM,pressure,latitude,longitude
0,2021-07-05,00:00,14,16,11,10.0,3,92,259,11,63,0.5,1004,53.333,-6.249
1,2021-07-05,01:00,14,16,11,10.0,3,91,253,12,87,0.2,1004,53.333,-6.249
2,2021-07-05,02:00,14,16,11,10.0,3,90,248,12,89,0.1,1004,53.333,-6.249
3,2021-07-05,03:00,14,16,11,10.0,3,88,243,13,8,0.0,1004,53.333,-6.249
4,2021-07-05,04:00,14,16,11,10.0,3,88,237,11,10,0.0,1004,53.333,-6.249
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
739,2021-08-04,19:00,15,19,13,8.0,5,83,175,18,84,0.0,1007,53.333,-6.249
740,2021-08-04,20:00,15,19,13,8.0,5,85,173,18,85,0.0,1007,53.333,-6.249
741,2021-08-04,21:00,15,19,13,8.0,5,87,170,18,86,0.0,1007,53.333,-6.249
742,2021-08-04,22:00,15,19,13,8.0,5,88,175,15,84,0.0,1007,53.333,-6.249


In [None]:
# option to Drop 'date' and 'hour' columns, now that we have 'datetime'
#weather_df.drop(['date', 'hour'], axis=1, inplace=True)

In [4]:
# this code to understand why it is 840 rows and not 816, which would be 34 * 24 hours
from collections import Counter

# Assuming weather_list is a list of dictionaries with 'date' and 'hour' as keys
hours_counter = Counter((item['date'], item['hour']) for item in weather_list)

# Find any dates and hours that occur more than once
duplicates = {k: v for k, v in hours_counter.items() if v > 1}

print("Duplicates or extra hours:", duplicates)


Duplicates or extra hours: {}


In [5]:
from collections import Counter

# Assuming weather_list is your list of dictionaries with each dictionary having a 'date' key
date_counts = Counter(item['date'] for item in weather_list)

# Now date_counts is a dictionary where the keys are dates and the values are the counts
for date, count in date_counts.items():
    print(f"Date: {date} has {count} observations")


Date: 2021-05-01 has 24 observations
Date: 2021-05-02 has 24 observations
Date: 2021-05-03 has 24 observations
Date: 2021-05-04 has 24 observations
Date: 2021-05-05 has 24 observations
Date: 2021-05-06 has 24 observations
Date: 2021-05-07 has 24 observations
Date: 2021-05-08 has 24 observations
Date: 2021-05-09 has 24 observations
Date: 2021-05-10 has 24 observations
Date: 2021-05-11 has 24 observations
Date: 2021-05-12 has 24 observations
Date: 2021-05-13 has 24 observations
Date: 2021-05-14 has 24 observations
Date: 2021-05-15 has 24 observations
Date: 2021-05-16 has 24 observations
Date: 2021-05-17 has 24 observations
Date: 2021-05-18 has 24 observations
Date: 2021-05-19 has 24 observations
Date: 2021-05-20 has 24 observations
Date: 2021-05-21 has 24 observations
Date: 2021-05-22 has 24 observations
Date: 2021-05-23 has 24 observations
Date: 2021-05-24 has 24 observations
Date: 2021-05-25 has 24 observations
Date: 2021-05-26 has 24 observations
Date: 2021-05-27 has 24 observations
D