In [26]:
import requests
import xmltodict
import pandas as pd

# Information on Met Éireann API at:
# https://data.gov.ie/dataset/met-eireann-weather-forecast-api/resource/027da6d5-d819-48d1-9b16-331dba169bd1

# URL for Met Éireann 3 day forecast
weather = "https://www.met.ie/Open_Data/xml/web-3Dayforecast.xml"

# Request to get the data, parse with XML to Dict
three_day_forecast =  requests.get(weather)
three_day_forecast = xmltodict.parse(three_day_forecast.text)
three_day_forecast = three_day_forecast['forecast']['station']


# For each city, I create each of the three dates as a dictionary. And then return the three rows as a dataframe
def get_data_by_day(city):
    data= []
    for x in city['day']:
        data.append({"location": city["location"], "day": x['date'], "minimum": x['min_temp'], "maximum": x['max_temp']})
    y = pd.DataFrame(data)
    return y

# Function takes all the data and breaks it down by city, eventually returns a list of dataframes
def get_cities(data):
    list_of_dfs = []
    for x in data:        
        list_of_dfs.append(get_data_by_day(x))
    return list_of_dfs

In [27]:
all_data = get_cities(three_day_forecast)

In [28]:
# concats all the dataframes
all_data = pd.concat(all_data, ignore_index =True, axis=0)

In [29]:
# checking data types of columns
all_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27 entries, 0 to 26
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   location  27 non-null     object
 1   day       27 non-null     object
 2   minimum   27 non-null     object
 3   maximum   27 non-null     object
dtypes: object(4)
memory usage: 992.0+ bytes


In [32]:
# Updated minimum column to numeric
all_data.minimum = pd.to_numeric(all_data.minimum)


In [33]:
# Updated maximum column to numeric
all_data.maximum = pd.to_numeric(all_data.maximum)

In [40]:
# Updated day column to date time
all_data.day = pd.to_datetime(all_data.day)

In [41]:
all_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27 entries, 0 to 26
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   location  27 non-null     object        
 1   day       27 non-null     datetime64[ns]
 2   minimum   27 non-null     int64         
 3   maximum   27 non-null     int64         
dtypes: datetime64[ns](1), int64(2), object(1)
memory usage: 992.0+ bytes


In [42]:
all_data.describe()

Unnamed: 0,minimum,maximum
count,27.0,27.0
mean,7.148148,13.37037
std,1.747607,1.471476
min,5.0,11.0
25%,5.0,12.0
50%,7.0,13.0
75%,9.0,14.5
max,10.0,16.0


In [38]:
all_data

Unnamed: 0,location,day,minimum,maximum
0,Dublin,2021-05-18 12:00:00,6,15
1,Dublin,2021-05-19 12:00:00,7,15
2,Dublin,2021-05-20 12:00:00,9,14
3,Wexford,2021-05-18 12:00:00,5,13
4,Wexford,2021-05-19 12:00:00,10,14
5,Wexford,2021-05-20 12:00:00,9,13
6,Cork,2021-05-18 12:00:00,5,15
7,Cork,2021-05-19 12:00:00,9,15
8,Cork,2021-05-20 12:00:00,9,12
9,Kerry,2021-05-18 12:00:00,5,13


In [39]:
all_data.describe()

Unnamed: 0,minimum,maximum
count,27.0,27.0
mean,7.148148,13.37037
std,1.747607,1.471476
min,5.0,11.0
25%,5.0,12.0
50%,7.0,13.0
75%,9.0,14.5
max,10.0,16.0
