## Weather Data US

In [None]:
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry
from functools import reduce

### Import Data through API

In [None]:


# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
	"latitude": [40.7143, 34.0522, 41.85, 37.7749, 43.6102, 32.7831, 38.8951, 33.749, 47.6062],
	"longitude": [-74.006, -118.2437, -87.65, -122.4194, -116.7832, -96.8067, -77.0364, -84.388, -122.3321],
	"start_date": "1982-01-01",
	"end_date": "2024-06-01",
	"daily": ["temperature_2m_mean", "daylight_duration", "rain_sum", "snowfall_sum", "wind_speed_10m_max"],
	"timezone": "GMT"
}
responses = openmeteo.weather_api(url, params=params)

# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

# Process daily data. The order of variables needs to be the same as requested.
daily = response.Daily()
daily_temperature_2m_mean = daily.Variables(0).ValuesAsNumpy()
daily_daylight_duration = daily.Variables(1).ValuesAsNumpy()
daily_rain_sum = daily.Variables(2).ValuesAsNumpy()
daily_snowfall_sum = daily.Variables(3).ValuesAsNumpy()
daily_wind_speed_10m_max = daily.Variables(4).ValuesAsNumpy()

daily_data = {"date": pd.date_range(
	start = pd.to_datetime(daily.Time(), unit = "s", utc = True),
	end = pd.to_datetime(daily.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = daily.Interval()),
	inclusive = "left"
)}
daily_data["temperature_2m_mean"] = daily_temperature_2m_mean
daily_data["daylight_duration"] = daily_daylight_duration
daily_data["rain_sum"] = daily_rain_sum
daily_data["snowfall_sum"] = daily_snowfall_sum
daily_data["wind_speed_10m_max"] = daily_wind_speed_10m_max

daily_dataframe = pd.DataFrame(data = daily_data)
print(daily_dataframe)


### EDA Weather Data

In [55]:
weather_df = pd.read_csv("open-meteo-40.74N74.04W51m.csv", skiprows=11, header=0)
weather_df

Unnamed: 0,location_id,time,temperature_2m_mean (°C),daylight_duration (s),rain_sum (mm),snowfall_sum (cm),wind_speed_10m_max (km/h)
0,0,1982-01-01,4.1,33523.23,11.6,0.0,25.0
1,0,1982-01-02,-0.4,33568.30,0.0,0.0,24.7
2,0,1982-01-03,-2.1,33617.07,0.0,0.0,9.5
3,0,1982-01-04,6.7,33669.43,26.7,0.0,34.2
4,0,1982-01-05,3.6,33725.31,2.0,0.0,35.8
...,...,...,...,...,...,...,...
139432,8,2024-05-28,11.9,56252.25,1.4,0.0,22.2
139433,8,2024-05-29,10.5,56357.75,2.3,0.0,20.1
139434,8,2024-05-30,11.3,56458.74,0.2,0.0,11.7
139435,8,2024-05-31,11.9,56555.02,0.0,0.0,16.1


In [46]:
weather_df.isna().sum()

location_id                  0
time                         0
temperature_2m_mean (°C)     0
daylight_duration (s)        0
rain_sum (mm)                0
snowfall_sum (cm)            0
wind_speed_10m_max (km/h)    0
dtype: int64

In [47]:
weather_df.describe()

Unnamed: 0,location_id,temperature_2m_mean (°C),daylight_duration (s),rain_sum (mm),snowfall_sum (cm),wind_speed_10m_max (km/h)
count,139437.0,139437.0,139437.0,139437.0,139437.0,139437.0
mean,4.0,13.869074,43914.147204,2.377446,0.072921,17.732793
std,2.581998,8.867124,6985.541839,6.575525,0.68862,7.173826
min,0.0,-27.9,30307.64,0.0,0.0,3.9
25%,2.0,7.8,37488.74,0.0,0.0,12.4
50%,4.0,14.2,43988.23,0.0,0.0,16.3
75%,6.0,20.7,50305.2,1.1,0.0,21.9
max,8.0,37.1,57563.83,183.8,37.94,67.5


In [56]:
weather_df.rename(columns={'time': 'Date'}, inplace=True)
weather_df

Unnamed: 0,location_id,Date,temperature_2m_mean (°C),daylight_duration (s),rain_sum (mm),snowfall_sum (cm),wind_speed_10m_max (km/h)
0,0,1982-01-01,4.1,33523.23,11.6,0.0,25.0
1,0,1982-01-02,-0.4,33568.30,0.0,0.0,24.7
2,0,1982-01-03,-2.1,33617.07,0.0,0.0,9.5
3,0,1982-01-04,6.7,33669.43,26.7,0.0,34.2
4,0,1982-01-05,3.6,33725.31,2.0,0.0,35.8
...,...,...,...,...,...,...,...
139432,8,2024-05-28,11.9,56252.25,1.4,0.0,22.2
139433,8,2024-05-29,10.5,56357.75,2.3,0.0,20.1
139434,8,2024-05-30,11.3,56458.74,0.2,0.0,11.7
139435,8,2024-05-31,11.9,56555.02,0.0,0.0,16.1


In [57]:
weather_df = weather_df[weather_df['Date'] >= '1990-01-01']
weather_df

Unnamed: 0,location_id,Date,temperature_2m_mean (°C),daylight_duration (s),rain_sum (mm),snowfall_sum (cm),wind_speed_10m_max (km/h)
2922,0,1990-01-01,4.9,33526.58,7.6,0.0,29.9
2923,0,1990-01-02,-0.9,33571.49,0.0,0.0,21.6
2924,0,1990-01-03,0.6,33620.02,0.0,0.0,15.7
2925,0,1990-01-04,1.8,33672.07,0.0,0.0,22.9
2926,0,1990-01-05,5.2,33727.59,0.0,0.0,18.7
...,...,...,...,...,...,...,...
139432,8,2024-05-28,11.9,56252.25,1.4,0.0,22.2
139433,8,2024-05-29,10.5,56357.75,2.3,0.0,20.1
139434,8,2024-05-30,11.3,56458.74,0.2,0.0,11.7
139435,8,2024-05-31,11.9,56555.02,0.0,0.0,16.1


In [58]:
weather_df = weather_df[weather_df['Date'] <= '2024-04-01']
weather_df

Unnamed: 0,location_id,Date,temperature_2m_mean (°C),daylight_duration (s),rain_sum (mm),snowfall_sum (cm),wind_speed_10m_max (km/h)
2922,0,1990-01-01,4.9,33526.58,7.6,0.0,29.9
2923,0,1990-01-02,-0.9,33571.49,0.0,0.0,21.6
2924,0,1990-01-03,0.6,33620.02,0.0,0.0,15.7
2925,0,1990-01-04,1.8,33672.07,0.0,0.0,22.9
2926,0,1990-01-05,5.2,33727.59,0.0,0.0,18.7
...,...,...,...,...,...,...,...
139371,8,2024-03-28,7.4,45536.32,7.1,0.0,22.1
139372,8,2024-03-29,7.9,45742.93,3.5,0.0,25.2
139373,8,2024-03-30,8.6,45948.61,0.0,0.0,21.4
139374,8,2024-03-31,7.5,46153.21,0.0,0.0,21.3


In [59]:
# Create a dictionary to store the separate datasets
weather_datasets = {}

# Group the dataframe by 'location_id'
grouped_weather = weather_df.groupby('location_id')

# Iterate over each group and store the datasets in the dictionary
for location_id, group in grouped_weather:
    weather_datasets[location_id] = group

# Access the separate datasets using the location_id as the key
New_York = weather_datasets.get(0)
Los_Angeles = weather_datasets.get(1)
Chicago = weather_datasets.get(2)
San_Francisco = weather_datasets.get(3)
Houston = weather_datasets.get(4)
Dallas = weather_datasets.get(5)
Washington = weather_datasets.get(6)
Atlanta = weather_datasets.get(7)
Seattle = weather_datasets.get(8)

### City Columns rename, Location column drop

In [64]:
New_York.rename(columns={'temperature_2m_mean (°C)': 'temperature_2m_mean (°C) NY', 'daylight_duration (s)': 'daylight_duration (s) NY', 'rain_sum (mm)' : 'rain_sum (mm) NY', 'snowfall_sum (cm)': 'snowfall_sum (cm) NY', 'wind_speed_10m_max (km/h)': 'wind_speed_10m_max (km/h) NY'}, inplace=True)
New_York.drop(columns=['location_id'], inplace=True)
New_York

Unnamed: 0,Date,temperature_2m_mean (°C) NY,daylight_duration (s) NY,rain_sum (mm) NY,snowfall_sum (cm) NY,wind_speed_10m_max (km/h) NY
2922,1990-01-01,4.9,33526.58,7.6,0.0,29.9
2923,1990-01-02,-0.9,33571.49,0.0,0.0,21.6
2924,1990-01-03,0.6,33620.02,0.0,0.0,15.7
2925,1990-01-04,1.8,33672.07,0.0,0.0,22.9
2926,1990-01-05,5.2,33727.59,0.0,0.0,18.7
...,...,...,...,...,...,...
15427,2024-03-28,8.4,45096.66,19.5,0.0,18.4
15428,2024-03-29,7.5,45258.80,2.4,0.0,34.5
15429,2024-03-30,7.0,45420.17,0.0,0.0,24.3
15430,2024-03-31,10.5,45580.66,0.3,0.0,20.3


In [65]:
Los_Angeles.rename(columns={'temperature_2m_mean (°C)': 'temperature_2m_mean (°C) LA', 'daylight_duration (s)': 'daylight_duration (s) LA', 'rain_sum (mm)' : 'rain_sum (mm) LA', 'snowfall_sum (cm)': 'snowfall_sum (cm) LA', 'wind_speed_10m_max (km/h)': 'wind_speed_10m_max (km/h) LA'}, inplace=True)
Los_Angeles.drop(columns=['location_id'], inplace=True)
Los_Angeles

Unnamed: 0,Date,temperature_2m_mean (°C) LA,daylight_duration (s) LA,rain_sum (mm) LA,snowfall_sum (cm) LA,wind_speed_10m_max (km/h) LA
18415,1990-01-01,11.1,35746.42,0.0,0.0,13.9
18416,1990-01-02,10.4,35780.72,0.8,0.0,16.3
18417,1990-01-03,8.6,35817.79,0.0,0.0,14.8
18418,1990-01-04,9.0,35857.57,0.0,0.0,11.3
18419,1990-01-05,11.0,35900.01,0.0,0.0,9.0
...,...,...,...,...,...,...
30920,2024-03-28,13.3,44756.91,0.0,0.0,19.6
30921,2024-03-29,13.0,44884.10,0.4,0.0,20.9
30922,2024-03-30,12.5,45010.66,28.3,0.0,24.5
30923,2024-03-31,11.3,45136.52,8.1,0.0,20.9


In [66]:
Chicago.rename(columns={'temperature_2m_mean (°C)': 'temperature_2m_mean (°C) CH', 'daylight_duration (s)': 'daylight_duration (s) CH', 'rain_sum (mm)' : 'rain_sum (mm) CH', 'snowfall_sum (cm)': 'snowfall_sum (cm) CH', 'wind_speed_10m_max (km/h)': 'wind_speed_10m_max (km/h) CH'}, inplace=True)
Chicago.drop(columns=['location_id'], inplace=True)
Chicago

Unnamed: 0,Date,temperature_2m_mean (°C) CH,daylight_duration (s) CH,rain_sum (mm) CH,snowfall_sum (cm) CH,wind_speed_10m_max (km/h) CH
33908,1990-01-01,-2.4,33103.68,0.0,0.0,20.3
33909,1990-01-02,-1.6,33150.68,0.0,0.0,26.6
33910,1990-01-03,1.5,33201.46,0.0,0.0,27.5
33911,1990-01-04,3.7,33255.94,8.1,0.0,27.2
33912,1990-01-05,-2.9,33314.03,0.0,0.0,17.1
...,...,...,...,...,...,...
46413,2024-03-28,3.0,45161.20,0.0,0.0,27.3
46414,2024-03-29,3.4,45329.92,0.0,0.0,16.4
46415,2024-03-30,8.6,45497.84,11.4,0.0,25.8
46416,2024-03-31,4.2,45664.85,0.3,0.0,19.2


In [67]:
San_Francisco.rename(columns={'temperature_2m_mean (°C)': 'temperature_2m_mean (°C) SF', 'daylight_duration (s)': 'daylight_duration (s) SF', 'rain_sum (mm)' : 'rain_sum (mm) SF', 'snowfall_sum (cm)': 'snowfall_sum (cm) SF', 'wind_speed_10m_max (km/h)': 'wind_speed_10m_max (km/h) SF'}, inplace=True)
San_Francisco.drop(columns=['location_id'], inplace=True)
San_Francisco

Unnamed: 0,Date,temperature_2m_mean (°C) SF,daylight_duration (s) SF,rain_sum (mm) SF,snowfall_sum (cm) SF,wind_speed_10m_max (km/h) SF
49401,1990-01-01,9.9,34563.46,5.6,0.0,18.4
49402,1990-01-02,7.8,34603.34,0.9,0.0,29.9
49403,1990-01-03,7.7,34646.44,0.0,0.0,26.7
49404,1990-01-04,7.3,34692.68,0.0,0.0,14.1
49405,1990-01-05,8.6,34742.00,0.0,0.0,16.9
...,...,...,...,...,...,...
61906,2024-03-28,12.5,44938.03,3.1,0.0,31.0
61907,2024-03-29,10.4,45083.94,17.1,0.0,39.1
61908,2024-03-30,11.3,45229.13,4.1,0.0,29.5
61909,2024-03-31,12.0,45373.52,0.7,0.0,21.1


In [68]:
Houston.rename(columns={'temperature_2m_mean (°C)': 'temperature_2m_mean (°C) HU', 'daylight_duration (s)': 'daylight_duration (s) HU', 'rain_sum (mm)' : 'rain_sum (mm) HU', 'snowfall_sum (cm)': 'snowfall_sum (cm) HU', 'wind_speed_10m_max (km/h)': 'wind_speed_10m_max (km/h) HU'}, inplace=True)
Houston.drop(columns=['location_id'], inplace=True)
Houston

Unnamed: 0,Date,temperature_2m_mean (°C) HU,daylight_duration (s) HU,rain_sum (mm) HU,snowfall_sum (cm) HU,wind_speed_10m_max (km/h) HU
64894,1990-01-01,0.5,32407.14,1.3,0.00,9.2
64895,1990-01-02,-0.7,32457.64,0.9,0.00,21.7
64896,1990-01-03,-3.7,32512.21,0.0,0.00,11.6
64897,1990-01-04,-3.5,32570.73,0.0,0.63,7.7
64898,1990-01-05,0.7,32633.13,0.0,0.00,7.4
...,...,...,...,...,...,...
77399,2024-03-28,7.1,45267.20,8.3,0.28,14.8
77400,2024-03-29,5.8,45446.67,0.2,0.00,13.0
77401,2024-03-30,8.0,45625.31,0.3,0.00,9.7
77402,2024-03-31,8.2,45802.99,0.0,0.00,28.3


In [69]:
Dallas.rename(columns={'temperature_2m_mean (°C)': 'temperature_2m_mean (°C) DL', 'daylight_duration (s)': 'daylight_duration (s) DL', 'rain_sum (mm)' : 'rain_sum (mm) DL', 'snowfall_sum (cm)': 'snowfall_sum (cm) DL', 'wind_speed_10m_max (km/h)': 'wind_speed_10m_max (km/h) DL'}, inplace=True)
Dallas.drop(columns=['location_id'], inplace=True)
Dallas

Unnamed: 0,Date,temperature_2m_mean (°C) DL,daylight_duration (s) DL,rain_sum (mm) DL,snowfall_sum (cm) DL,wind_speed_10m_max (km/h) DL
80387,1990-01-01,5.6,36121.81,0.0,0.0,11.9
80388,1990-01-02,9.2,36154.38,0.0,0.0,28.6
80389,1990-01-03,13.2,36189.57,2.3,0.0,30.2
80390,1990-01-04,10.0,36227.34,0.2,0.0,20.5
80391,1990-01-05,5.8,36267.62,0.6,0.0,8.3
...,...,...,...,...,...,...
92892,2024-03-28,13.2,44699.52,0.0,0.0,16.2
92893,2024-03-29,17.9,44820.73,0.0,0.0,36.0
92894,2024-03-30,20.0,44941.35,0.0,0.0,27.8
92895,2024-03-31,21.4,45061.29,0.2,0.0,27.8


In [70]:
Washington.rename(columns={'temperature_2m_mean (°C)': 'temperature_2m_mean (°C) WA', 'daylight_duration (s)': 'daylight_duration (s) WA', 'rain_sum (mm)' : 'rain_sum (mm) WA', 'snowfall_sum (cm)': 'snowfall_sum (cm) WA', 'wind_speed_10m_max (km/h)': 'wind_speed_10m_max (km/h) WA'}, inplace=True)
Washington.drop(columns=['location_id'], inplace=True)
Washington

Unnamed: 0,Date,temperature_2m_mean (°C) WA,daylight_duration (s) WA,rain_sum (mm) WA,snowfall_sum (cm) WA,wind_speed_10m_max (km/h) WA
95880,1990-01-01,5.7,34180.06,9.8,0.0,31.7
95881,1990-01-02,0.4,34221.79,0.0,0.0,18.1
95882,1990-01-03,1.0,34266.88,0.0,0.0,9.5
95883,1990-01-04,4.0,34315.25,2.0,0.0,26.3
95884,1990-01-05,6.5,34366.84,0.0,0.0,18.2
...,...,...,...,...,...,...
108385,2024-03-28,9.1,44996.73,7.2,0.0,18.2
108386,2024-03-29,9.0,45148.66,0.0,0.0,27.0
108387,2024-03-30,10.0,45299.85,0.2,0.0,15.8
108388,2024-03-31,12.7,45450.21,3.9,0.0,14.9


In [71]:
Atlanta.rename(columns={'temperature_2m_mean (°C)': 'temperature_2m_mean (°C) AT', 'daylight_duration (s)': 'daylight_duration (s) AT', 'rain_sum (mm)' : 'rain_sum (mm) AT', 'snowfall_sum (cm)': 'snowfall_sum (cm) AT', 'wind_speed_10m_max (km/h)': 'wind_speed_10m_max (km/h) AT'}, inplace=True)
Atlanta.drop(columns=['location_id'], inplace=True)
Atlanta

Unnamed: 0,Date,temperature_2m_mean (°C) AT,daylight_duration (s) AT,rain_sum (mm) AT,snowfall_sum (cm) AT,wind_speed_10m_max (km/h) AT
111373,1990-01-01,5.1,35830.89,0.0,0.0,24.5
111374,1990-01-02,1.5,35864.80,0.0,0.0,12.1
111375,1990-01-03,6.2,35901.45,0.0,0.0,9.2
111376,1990-01-04,10.8,35940.77,8.5,0.0,14.9
111377,1990-01-05,13.2,35982.72,7.4,0.0,13.4
...,...,...,...,...,...,...
123878,2024-03-28,13.0,44743.99,0.0,0.0,26.7
123879,2024-03-29,11.1,44869.84,0.0,0.0,14.1
123880,2024-03-30,13.8,44995.06,0.0,0.0,19.1
123881,2024-03-31,16.8,45119.59,0.0,0.0,18.3


In [72]:
Seattle.rename(columns={'temperature_2m_mean (°C)': 'temperature_2m_mean (°C) SE', 'daylight_duration (s)': 'daylight_duration (s) SE', 'rain_sum (mm)' : 'rain_sum (mm) SE', 'snowfall_sum (cm)': 'snowfall_sum (cm) SE', 'wind_speed_10m_max (km/h)': 'wind_speed_10m_max (km/h) SE'}, inplace=True)
Seattle.drop(columns=['location_id'], inplace=True)
Seattle

Unnamed: 0,Date,temperature_2m_mean (°C) SE,daylight_duration (s) SE,rain_sum (mm) SE,snowfall_sum (cm) SE,wind_speed_10m_max (km/h) SE
126866,1990-01-01,5.7,30621.57,6.3,0.00,23.3
126867,1990-01-02,0.7,30681.43,1.9,2.03,13.8
126868,1990-01-03,3.7,30746.07,7.0,0.00,22.4
126869,1990-01-04,6.1,30815.38,12.8,0.00,23.2
126870,1990-01-05,7.8,30889.25,9.7,0.00,30.3
...,...,...,...,...,...,...
139371,2024-03-28,7.4,45536.32,7.1,0.00,22.1
139372,2024-03-29,7.9,45742.93,3.5,0.00,25.2
139373,2024-03-30,8.6,45948.61,0.0,0.00,21.4
139374,2024-03-31,7.5,46153.21,0.0,0.00,21.3


### Merge all City Dataframes

In [None]:
# Assuming you have the dataframes named as follows:
# New_York, Los_Angeles, Chicago, San_Francisco, Houston, Dallas, Washington, Atlanta, Seattle

# List of dataframes to merge
dataframes = [New_York, Los_Angeles, Chicago, San_Francisco, Houston, Dallas, Washington, Atlanta, Seattle]

# Merge all dataframes on 'Date'
merged_df = reduce(lambda left, right: pd.merge(left, right, on='Date', suffixes=('', '_dup')), dataframes)

# Drop duplicate columns if any
merged_weather_df = merged_df.loc[:, ~merged_df.columns.str.endswith('_dup')]

# Display the merged dataframe
merged_weather_df

Unnamed: 0,Date,temperature_2m_mean (°C) NY,daylight_duration (s) NY,rain_sum (mm) NY,snowfall_sum (cm) NY,wind_speed_10m_max (km/h) NY,temperature_2m_mean (°C) LA,daylight_duration (s) LA,rain_sum (mm) LA,snowfall_sum (cm) LA,...,temperature_2m_mean (°C) AT,daylight_duration (s) AT,rain_sum (mm) AT,snowfall_sum (cm) AT,wind_speed_10m_max (km/h) AT,temperature_2m_mean (°C) SE,daylight_duration (s) SE,rain_sum (mm) SE,snowfall_sum (cm) SE,wind_speed_10m_max (km/h) SE
0,1990-01-01,4.9,33526.58,7.6,0.0,29.9,11.1,35746.42,0.0,0.0,...,5.1,35830.89,0.0,0.0,24.5,5.7,30621.57,6.3,0.00,23.3
1,1990-01-02,-0.9,33571.49,0.0,0.0,21.6,10.4,35780.72,0.8,0.0,...,1.5,35864.80,0.0,0.0,12.1,0.7,30681.43,1.9,2.03,13.8
2,1990-01-03,0.6,33620.02,0.0,0.0,15.7,8.6,35817.79,0.0,0.0,...,6.2,35901.45,0.0,0.0,9.2,3.7,30746.07,7.0,0.00,22.4
3,1990-01-04,1.8,33672.07,0.0,0.0,22.9,9.0,35857.57,0.0,0.0,...,10.8,35940.77,8.5,0.0,14.9,6.1,30815.38,12.8,0.00,23.2
4,1990-01-05,5.2,33727.59,0.0,0.0,18.7,11.0,35900.01,0.0,0.0,...,13.2,35982.72,7.4,0.0,13.4,7.8,30889.25,9.7,0.00,30.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12505,2024-03-28,8.4,45096.66,19.5,0.0,18.4,13.3,44756.91,0.0,0.0,...,13.0,44743.99,0.0,0.0,26.7,7.4,45536.32,7.1,0.00,22.1
12506,2024-03-29,7.5,45258.80,2.4,0.0,34.5,13.0,44884.10,0.4,0.0,...,11.1,44869.84,0.0,0.0,14.1,7.9,45742.93,3.5,0.00,25.2
12507,2024-03-30,7.0,45420.17,0.0,0.0,24.3,12.5,45010.66,28.3,0.0,...,13.8,44995.06,0.0,0.0,19.1,8.6,45948.61,0.0,0.00,21.4
12508,2024-03-31,10.5,45580.66,0.3,0.0,20.3,11.3,45136.52,8.1,0.0,...,16.8,45119.59,0.0,0.0,18.3,7.5,46153.21,0.0,0.00,21.3


In [76]:
merged_weather_df.isna().sum()

Date                            0
temperature_2m_mean (°C) NY     0
daylight_duration (s) NY        0
rain_sum (mm) NY                0
snowfall_sum (cm) NY            0
wind_speed_10m_max (km/h) NY    0
temperature_2m_mean (°C) LA     0
daylight_duration (s) LA        0
rain_sum (mm) LA                0
snowfall_sum (cm) LA            0
wind_speed_10m_max (km/h) LA    0
temperature_2m_mean (°C) CH     0
daylight_duration (s) CH        0
rain_sum (mm) CH                0
snowfall_sum (cm) CH            0
wind_speed_10m_max (km/h) CH    0
temperature_2m_mean (°C) SF     0
daylight_duration (s) SF        0
rain_sum (mm) SF                0
snowfall_sum (cm) SF            0
wind_speed_10m_max (km/h) SF    0
temperature_2m_mean (°C) HU     0
daylight_duration (s) HU        0
rain_sum (mm) HU                0
snowfall_sum (cm) HU            0
wind_speed_10m_max (km/h) HU    0
temperature_2m_mean (°C) DL     0
daylight_duration (s) DL        0
rain_sum (mm) DL                0
snowfall_sum (

### Merger of Econ and Weather Data

In [77]:
econ_data = pd.read_csv("/Users/maxquarder/Documents/Max_Studium/Data Science and Business Analytics/Class_Lectures/Final Project/Master_Thesis_Python/economic_data_daily_level_v1.csv")
econ_data

Unnamed: 0,Date,GDP : check unit,GPDI : check unit,NETEXP : check unit,GovTotExp : check unit,ImpGoServ : check unit,ExpGoServ : check unit,Unempl : check unit,PCE : check unit,ProPriceInd : check unit,10yIntRate : check unit,GloWheat : check unit,GloUranium : check unit,GloPalmOil : check unit,GloBrentCrude : check unit
0,1990-01-01,5872.701,1010.838,-88.536,1998.201,626.767,538.232,5.4,3730.7,114.900,4.127623,167.918579,9.000000,230.555951,20.989130
1,1990-01-02,5872.701,1010.838,-88.536,1998.201,626.767,538.232,5.4,3730.7,114.900,4.127623,167.918579,9.000000,230.555951,20.989130
2,1990-01-03,5872.701,1010.838,-88.536,1998.201,626.767,538.232,5.4,3730.7,114.900,4.127623,167.918579,9.000000,230.555951,20.989130
3,1990-01-04,5872.701,1010.838,-88.536,1998.201,626.767,538.232,5.4,3730.7,114.900,4.127623,167.918579,9.000000,230.555951,20.989130
4,1990-01-05,5872.701,1010.838,-88.536,1998.201,626.767,538.232,5.4,3730.7,114.900,4.127623,167.918579,9.000000,230.555951,20.989130
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12505,2024-03-28,28269.174,5020.538,-834.896,9925.034,3919.689,3084.793,3.8,19263.7,254.963,1.925969,211.841017,71.814275,899.043923,84.961429
12506,2024-03-29,28269.174,5020.538,-834.896,9925.034,3919.689,3084.793,3.8,19263.7,254.963,1.925969,211.841017,71.814275,899.043923,84.961429
12507,2024-03-30,28269.174,5020.538,-834.896,9925.034,3919.689,3084.793,3.8,19263.7,254.963,1.925969,211.841017,71.814275,899.043923,84.961429
12508,2024-03-31,28269.174,5020.538,-834.896,9925.034,3919.689,3084.793,3.8,19263.7,254.963,1.925969,211.841017,71.814275,899.043923,84.961429


In [78]:
# Assuming econ_data_cut and data_additional_day_cut are your dataframes

# Ensure 'Date' columns are in datetime format
econ_data['Date'] = pd.to_datetime(econ_data['Date'])
merged_weather_df['Date'] = pd.to_datetime(merged_weather_df['Date'])

# Merge the dataframes on the 'Date' column
merged_data_daily = pd.merge(econ_data, merged_weather_df, on='Date', how='outer')

# Display the merged dataframe
merged_data_daily

Unnamed: 0,Date,GDP : check unit,GPDI : check unit,NETEXP : check unit,GovTotExp : check unit,ImpGoServ : check unit,ExpGoServ : check unit,Unempl : check unit,PCE : check unit,ProPriceInd : check unit,...,temperature_2m_mean (°C) AT,daylight_duration (s) AT,rain_sum (mm) AT,snowfall_sum (cm) AT,wind_speed_10m_max (km/h) AT,temperature_2m_mean (°C) SE,daylight_duration (s) SE,rain_sum (mm) SE,snowfall_sum (cm) SE,wind_speed_10m_max (km/h) SE
0,1990-01-01,5872.701,1010.838,-88.536,1998.201,626.767,538.232,5.4,3730.7,114.900,...,5.1,35830.89,0.0,0.0,24.5,5.7,30621.57,6.3,0.00,23.3
1,1990-01-02,5872.701,1010.838,-88.536,1998.201,626.767,538.232,5.4,3730.7,114.900,...,1.5,35864.80,0.0,0.0,12.1,0.7,30681.43,1.9,2.03,13.8
2,1990-01-03,5872.701,1010.838,-88.536,1998.201,626.767,538.232,5.4,3730.7,114.900,...,6.2,35901.45,0.0,0.0,9.2,3.7,30746.07,7.0,0.00,22.4
3,1990-01-04,5872.701,1010.838,-88.536,1998.201,626.767,538.232,5.4,3730.7,114.900,...,10.8,35940.77,8.5,0.0,14.9,6.1,30815.38,12.8,0.00,23.2
4,1990-01-05,5872.701,1010.838,-88.536,1998.201,626.767,538.232,5.4,3730.7,114.900,...,13.2,35982.72,7.4,0.0,13.4,7.8,30889.25,9.7,0.00,30.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12505,2024-03-28,28269.174,5020.538,-834.896,9925.034,3919.689,3084.793,3.8,19263.7,254.963,...,13.0,44743.99,0.0,0.0,26.7,7.4,45536.32,7.1,0.00,22.1
12506,2024-03-29,28269.174,5020.538,-834.896,9925.034,3919.689,3084.793,3.8,19263.7,254.963,...,11.1,44869.84,0.0,0.0,14.1,7.9,45742.93,3.5,0.00,25.2
12507,2024-03-30,28269.174,5020.538,-834.896,9925.034,3919.689,3084.793,3.8,19263.7,254.963,...,13.8,44995.06,0.0,0.0,19.1,8.6,45948.61,0.0,0.00,21.4
12508,2024-03-31,28269.174,5020.538,-834.896,9925.034,3919.689,3084.793,3.8,19263.7,254.963,...,16.8,45119.59,0.0,0.0,18.3,7.5,46153.21,0.0,0.00,21.3


In [79]:
merged_data_daily.to_csv("data_daily_econ_weather.csv", index=False)