In [109]:
import pandas as pd
import numpy as np
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry

In [110]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
	"latitude": 43.7001,
	"longitude": -79.4163,
	"start_date": "2022-01-01",
	"end_date": "2023-12-31",
	"daily": ["weather_code", "temperature_2m_mean", "precipitation_sum", "rain_sum", "snowfall_sum", "precipitation_hours", "wind_speed_10m_max", "wind_gusts_10m_max"],
	"timezone": "America/New_York"
}
responses = openmeteo.weather_api(url, params=params)
# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

# Process daily data. The order of variables needs to be the same as requested.
daily = response.Daily()
daily_weather_code = daily.Variables(0).ValuesAsNumpy()
daily_temperature_2m_mean = daily.Variables(1).ValuesAsNumpy()
daily_precipitation_sum = daily.Variables(2).ValuesAsNumpy()
daily_rain_sum = daily.Variables(3).ValuesAsNumpy()
daily_snowfall_sum = daily.Variables(4).ValuesAsNumpy()
daily_precipitation_hours = daily.Variables(5).ValuesAsNumpy()
daily_wind_speed_10m_max = daily.Variables(6).ValuesAsNumpy()
daily_wind_gusts_10m_max = daily.Variables(7).ValuesAsNumpy()

daily_data = {"date": pd.date_range(
	start = pd.to_datetime(daily.Time(), unit = "s", utc = True),
	end = pd.to_datetime(daily.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = daily.Interval()),
	inclusive = "left"
)}
daily_data["weather_code"] = daily_weather_code
daily_data["temperature_2m_mean"] = daily_temperature_2m_mean
daily_data["precipitation_sum"] = daily_precipitation_sum
daily_data["rain_sum"] = daily_rain_sum
daily_data["snowfall_sum"] = daily_snowfall_sum
daily_data["precipitation_hours"] = daily_precipitation_hours
daily_data["wind_speed_10m_max"] = daily_wind_speed_10m_max
daily_data["wind_gusts_10m_max"] = daily_wind_gusts_10m_max

daily_dataframe = pd.DataFrame(data = daily_data)

Coordinates 43.6906852722168°N -79.4117431640625°E
Elevation 175.0 m asl
Timezone b'America/New_York' b'EDT'
Timezone difference to GMT+0 -14400 s


In [111]:
#sometimes it's more convenient to look at DFs with these settings
#pd.set_option('display.max_rows', None)
#pd.set_option('display.max_colwidth', None)

In [112]:
df_readme = pd.read_excel('ttc-bus-delay-data-readme.xlsx')
df_readme

Unnamed: 0,Field Name,Description,Example
0,Report Date,The date (YYYY/MM/DD) when the delay-causing incident occurred,2017-06-20 00:00:00
1,Route,The number of the bus route,51
2,Time,The time (hh:mm:ss AM/PM) when the delay-causing incident occurred,00:35:00
3,Day,The name of the day,Monday
4,Location,The location of the delay-causing incident,York Mills Station
5,Incident,The description of the delay-causing incident,Mechanical
6,Min Delay,"The delay, in minutes, to the schedule for the following bus",10
7,Min Gap,"The total scheduled time, in minutes, from the bus ahead of the following bus",20
8,Direction,"The direction of the bus route where B,b or BW indicates both ways. (On an east west route, it includes both east and west) NB - northbound, SB - southbound, EB - eastbound, WB - westbound",N
9,Vehicle,Vehicle number,1057


In [113]:
df22 = pd.read_excel('ttc-bus-delay-data-2022.xlsx')
df23 = pd.read_excel('ttc-bus-delay-data-2023.xlsx')

In [114]:
df22.head(20)

Unnamed: 0,Date,Route,Time,Day,Location,Incident,Min Delay,Min Gap,Direction,Vehicle
0,2022-01-01,320,02:00,Saturday,YONGE AND DUNDAS,General Delay,0,0,,8531
1,2022-01-01,325,02:00,Saturday,OVERLEA AND THORCLIFFE,Diversion,131,161,W,8658
2,2022-01-01,320,02:00,Saturday,YONGE AND STEELES,Operations - Operator,17,20,S,0
3,2022-01-01,320,02:07,Saturday,YONGE AND STEELES,Operations - Operator,4,11,S,0
4,2022-01-01,320,02:13,Saturday,YONGE AND STEELES,Operations - Operator,4,8,S,0
5,2022-01-01,363,02:16,Saturday,KING AND SHAW,Operations - Operator,30,60,,0
6,2022-01-01,96,02:18,Saturday,HUMBERLINE LOOP,Security,0,0,N,3536
7,2022-01-01,320,02:38,Saturday,STEELES AND YONGE,Operations - Operator,4,8,,0
8,2022-01-01,320,02:55,Saturday,YONGE AND STEELES,Operations - Operator,4,8,,0
9,2022-01-01,300,03:18,Saturday,KENNEDY STATION,Emergency Services,0,0,E,8094


In [115]:
df23.head(20)

Unnamed: 0,Date,Route,Time,Day,Location,Incident,Min Delay,Min Gap,Direction,Vehicle
0,2023-01-01,91,02:30,Sunday,WOODBINE AND MORTIMER,Diversion,81,111,,8772
1,2023-01-01,69,02:34,Sunday,WARDEN STATION,Security,22,44,S,8407
2,2023-01-01,35,03:06,Sunday,JANE STATION,Cleaning - Unsanitary,30,60,N,1051
3,2023-01-01,900,03:14,Sunday,KIPLING STATION,Security,17,17,,3334
4,2023-01-01,85,03:43,Sunday,MEADOWALE LOOP,Security,1,1,,1559
5,2023-01-01,40,03:47,Sunday,KIPLING STATION,Emergency Services,0,0,,0
6,2023-01-01,336,03:52,Sunday,FINCH AND ALNESS,Diversion,138,168,,9220
7,2023-01-01,52,04:25,Sunday,LAWRENCE AND YONGE,Emergency Services,30,60,E,3520
8,2023-01-01,24,04:35,Sunday,DANFORTH AND MAIN,Cleaning - Unsanitary,20,40,W,8404
9,2023-01-01,36,05:18,Sunday,FINCH AND ALNESS,Diversion,334,344,,3524


In [116]:
df22.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 58707 entries, 0 to 58706
Data columns (total 10 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       58707 non-null  datetime64[ns]
 1   Route      58323 non-null  object        
 2   Time       58707 non-null  object        
 3   Day        58707 non-null  object        
 4   Location   58707 non-null  object        
 5   Incident   58707 non-null  object        
 6   Min Delay  58707 non-null  int64         
 7   Min Gap    58707 non-null  int64         
 8   Direction  48227 non-null  object        
 9   Vehicle    58707 non-null  int64         
dtypes: datetime64[ns](1), int64(3), object(6)
memory usage: 4.5+ MB


In [117]:
df23.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 56207 entries, 0 to 56206
Data columns (total 10 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       56207 non-null  datetime64[ns]
 1   Route      55637 non-null  object        
 2   Time       56207 non-null  object        
 3   Day        56207 non-null  object        
 4   Location   56207 non-null  object        
 5   Incident   56207 non-null  object        
 6   Min Delay  56207 non-null  int64         
 7   Min Gap    56207 non-null  int64         
 8   Direction  46667 non-null  object        
 9   Vehicle    56207 non-null  int64         
dtypes: datetime64[ns](1), int64(3), object(6)
memory usage: 4.3+ MB


**Same columns, same formats**

In [118]:
df22.describe()

Unnamed: 0,Date,Min Delay,Min Gap,Vehicle
count,58707,58707.0,58707.0,58707.0
mean,2022-07-03 11:48:01.066993664,20.115353,32.676154,5467.459298
min,2022-01-01 00:00:00,0.0,0.0,0.0
25%,2022-04-06 00:00:00,9.0,17.0,1553.0
50%,2022-07-13 00:00:00,11.0,22.0,7959.0
75%,2022-09-27 00:00:00,20.0,36.0,8546.0
max,2022-12-31 00:00:00,999.0,999.0,93561.0
std,,48.945121,50.636856,4356.685772


In [119]:
df23.describe()

Unnamed: 0,Date,Min Delay,Min Gap,Vehicle
count,56207,56207.0,56207.0,56207.0
mean,2023-07-13 15:27:51.870051840,20.251606,32.897557,5576.54737
min,2023-01-01 00:00:00,0.0,0.0,0.0
25%,2023-04-15 00:00:00,9.0,17.0,3155.0
50%,2023-07-23 00:00:00,11.0,21.0,7270.0
75%,2023-10-12 00:00:00,20.0,40.0,8545.0
max,2023-12-31 00:00:00,998.0,992.0,91024.0
std,,50.170167,52.287433,3741.874633


In [120]:
[df22['Incident'].unique().size, df23['Incident'].unique().size]

[15, 13]

**Shared categories will go into ANOVA**

In [121]:
delay_categories_22 = list(df22['Incident'].unique())
delay_categories_23 = list(df23['Incident'].unique())
shared_categories_22_23 = list(set(delay_categories_22).intersection(set(delay_categories_23)))
not_shared_categories_22_23 = list(set(delay_categories_22).symmetric_difference(set(delay_categories_23)))
shared_categories_22_23

['Collision - TTC',
 'Cleaning - Unsanitary',
 'Diversion',
 'General Delay',
 'Security',
 'Held By',
 'Operations - Operator',
 'Emergency Services',
 'Mechanical',
 'Road Blocked - NON-TTC Collision',
 'Vision',
 'Investigation',
 'Utilized Off Route']

**Both 'Late Entering Service' and 'Cleaning - Disinfection' have significant size in 2023, though they are not in 2022. I will create dummies for them as well**

In [122]:
df22[df22['Incident'] == 'Cleaning - Disinfection']

Unnamed: 0,Date,Route,Time,Day,Location,Incident,Min Delay,Min Gap,Direction,Vehicle
2488,2022-01-12,47,13:41,Wednesday,CALEDONIA AND BRIDGELA,Cleaning - Disinfection,247,259,W,8368
5138,2022-01-26,165,16:24,Wednesday,WESTON AND STEELES,Cleaning - Disinfection,244,253,N,3714
6872,2022-02-07,34,18:31,Monday,EGLINTON STATION,Cleaning - Disinfection,9,17,W,8790
14963,2022-04-08,52,23:48,Friday,LAWRENCE WEST STATION,Cleaning - Disinfection,20,40,E,8134
26442,2022-06-24,168,21:10,Friday,DAVENPORT AND SYMINGTO,Cleaning - Disinfection,7,14,,1259
33363,2022-07-31,35,16:51,Sunday,JANE AND SHPPARD,Cleaning - Disinfection,9,17,N,1056


In [123]:
df22[df22['Incident'] == 'Late Entering Service']

Unnamed: 0,Date,Route,Time,Day,Location,Incident,Min Delay,Min Gap,Direction,Vehicle
3224,2022-01-17,95,02:28,Monday,SHEPPARD STATION,Late Entering Service,14,0,E,3448
4586,2022-01-23,86,14:11,Sunday,KENNEDY STATION,Late Entering Service,9,18,E,3462
5050,2022-01-26,954,06:17,Wednesday,LAWRENCE EAST STATION,Late Entering Service,8,0,E,0
5132,2022-01-26,36,15:37,Wednesday,FINCH WEST STATION,Late Entering Service,25,50,,3608
7722,2022-02-14,91,05:00,Monday,2 ALVARADO PLACE,Late Entering Service,24,48,W,8655
9854,2022-03-01,90,05:11,Tuesday,WILSON GARAGE,Late Entering Service,20,20,S,8192
10494,2022-03-06,80,15:08,Sunday,THE QUEENSWAY AND KIPL,Late Entering Service,24,48,E,8070
10683,2022-03-08,98,05:28,Tuesday,WILSON GARAGE,Late Entering Service,30,30,S,8163
11147,2022-03-11,320,01:43,Friday,YONGE AND STEELES,Late Entering Service,20,26,S,8354
13866,2022-03-31,74,17:50,Thursday,ST CLAIR STATION,Late Entering Service,20,40,W,8702


In [124]:
daily_dataframe

Unnamed: 0,date,weather_code,temperature_2m_mean,precipitation_sum,rain_sum,snowfall_sum,precipitation_hours,wind_speed_10m_max,wind_gusts_10m_max
0,2022-01-01 04:00:00+00:00,73.0,2.319333,2.800000,0.400000,1.68,9.0,20.907913,36.000000
1,2022-01-02 04:00:00+00:00,73.0,-6.297333,6.000000,0.000000,4.20,14.0,22.264771,39.239998
2,2022-01-03 04:00:00+00:00,3.0,-9.074418,0.000000,0.000000,0.00,0.0,16.981165,29.160000
3,2022-01-04 04:00:00+00:00,3.0,-2.722333,0.000000,0.000000,0.00,0.0,21.398056,37.079998
4,2022-01-05 04:00:00+00:00,51.0,1.611000,0.800000,0.800000,0.00,5.0,35.221176,64.079994
...,...,...,...,...,...,...,...,...,...
725,2023-12-27 04:00:00+00:00,63.0,6.240167,20.000002,20.000002,0.00,19.0,17.253731,31.319998
726,2023-12-28 04:00:00+00:00,53.0,6.121417,3.800000,3.800000,0.00,14.0,15.790833,28.440001
727,2023-12-29 04:00:00+00:00,55.0,5.406834,5.800000,5.800000,0.00,18.0,15.986595,29.879999
728,2023-12-30 04:00:00+00:00,51.0,0.463083,0.200000,0.200000,0.00,2.0,16.965895,30.960001


What I need to do:

Merge 22 and 23 into one

get dummies for merged df

group by days

merge with weather data

In [125]:
merged_delays_df = pd.concat([df22, df23], ignore_index=True)

In [126]:
merged_delays_df[58704:58720] #to check the border

Unnamed: 0,Date,Route,Time,Day,Location,Incident,Min Delay,Min Gap,Direction,Vehicle
58704,2022-12-31,72,23:56,Saturday,LOWER SHERBOURNE AND L,Security,18,36,N,8493
58705,2022-12-31,22,00:17,Saturday,BINGHAM LOOP,Cleaning - Unsanitary,10,20,S,8582
58706,2022-12-31,7,00:28,Saturday,BATHURST STATION,General Delay,10,20,N,8354
58707,2023-01-01,91,02:30,Sunday,WOODBINE AND MORTIMER,Diversion,81,111,,8772
58708,2023-01-01,69,02:34,Sunday,WARDEN STATION,Security,22,44,S,8407
58709,2023-01-01,35,03:06,Sunday,JANE STATION,Cleaning - Unsanitary,30,60,N,1051
58710,2023-01-01,900,03:14,Sunday,KIPLING STATION,Security,17,17,,3334
58711,2023-01-01,85,03:43,Sunday,MEADOWALE LOOP,Security,1,1,,1559
58712,2023-01-01,40,03:47,Sunday,KIPLING STATION,Emergency Services,0,0,,0
58713,2023-01-01,336,03:52,Sunday,FINCH AND ALNESS,Diversion,138,168,,9220


In [127]:
merged_delays_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 114914 entries, 0 to 114913
Data columns (total 10 columns):
 #   Column     Non-Null Count   Dtype         
---  ------     --------------   -----         
 0   Date       114914 non-null  datetime64[ns]
 1   Route      113960 non-null  object        
 2   Time       114914 non-null  object        
 3   Day        114914 non-null  object        
 4   Location   114914 non-null  object        
 5   Incident   114914 non-null  object        
 6   Min Delay  114914 non-null  int64         
 7   Min Gap    114914 non-null  int64         
 8   Direction  94894 non-null   object        
 9   Vehicle    114914 non-null  int64         
dtypes: datetime64[ns](1), int64(3), object(6)
memory usage: 8.8+ MB


In [128]:
merged_df_dummies = pd.get_dummies(merged_delays_df, columns=['Incident'])

In [129]:
merged_df_dummies.head()

Unnamed: 0,Date,Route,Time,Day,Location,Min Delay,Min Gap,Direction,Vehicle,Incident_Cleaning - Disinfection,...,Incident_General Delay,Incident_Held By,Incident_Investigation,Incident_Late Entering Service,Incident_Mechanical,Incident_Operations - Operator,Incident_Road Blocked - NON-TTC Collision,Incident_Security,Incident_Utilized Off Route,Incident_Vision
0,2022-01-01,320,02:00,Saturday,YONGE AND DUNDAS,0,0,,8531,False,...,True,False,False,False,False,False,False,False,False,False
1,2022-01-01,325,02:00,Saturday,OVERLEA AND THORCLIFFE,131,161,W,8658,False,...,False,False,False,False,False,False,False,False,False,False
2,2022-01-01,320,02:00,Saturday,YONGE AND STEELES,17,20,S,0,False,...,False,False,False,False,False,True,False,False,False,False
3,2022-01-01,320,02:07,Saturday,YONGE AND STEELES,4,11,S,0,False,...,False,False,False,False,False,True,False,False,False,False
4,2022-01-01,320,02:13,Saturday,YONGE AND STEELES,4,8,S,0,False,...,False,False,False,False,False,True,False,False,False,False


In [130]:
merged_df_dummies.columns

Index(['Date', 'Route', 'Time', 'Day', 'Location', 'Min Delay', 'Min Gap',
       'Direction', 'Vehicle', 'Incident_Cleaning - Disinfection',
       'Incident_Cleaning - Unsanitary', 'Incident_Collision - TTC',
       'Incident_Diversion', 'Incident_Emergency Services',
       'Incident_General Delay', 'Incident_Held By', 'Incident_Investigation',
       'Incident_Late Entering Service', 'Incident_Mechanical',
       'Incident_Operations - Operator',
       'Incident_Road Blocked - NON-TTC Collision', 'Incident_Security',
       'Incident_Utilized Off Route', 'Incident_Vision'],
      dtype='object')

In [131]:
incident_columns = ['Incident_Cleaning - Disinfection',
       'Incident_Cleaning - Unsanitary', 'Incident_Collision - TTC',
       'Incident_Diversion', 'Incident_Emergency Services',
       'Incident_General Delay', 'Incident_Held By', 'Incident_Investigation',
       'Incident_Late Entering Service', 'Incident_Mechanical',
       'Incident_Operations - Operator',
       'Incident_Road Blocked - NON-TTC Collision', 'Incident_Security',
       'Incident_Utilized Off Route', 'Incident_Vision']

In [132]:
df_daily = merged_df_dummies.groupby('Date')[incident_columns].sum().reset_index()

In [133]:
df_daily['Total Incidents'] = df_daily[incident_columns].sum(axis=1)

In [134]:
merged_df = pd.concat([daily_dataframe, df_daily], axis=1)

In [135]:
merged_df = merged_df.rename(columns={'date': 'date_from_weather'})
merged_df

Unnamed: 0,date_from_weather,weather_code,temperature_2m_mean,precipitation_sum,rain_sum,snowfall_sum,precipitation_hours,wind_speed_10m_max,wind_gusts_10m_max,Date,...,Incident_Held By,Incident_Investigation,Incident_Late Entering Service,Incident_Mechanical,Incident_Operations - Operator,Incident_Road Blocked - NON-TTC Collision,Incident_Security,Incident_Utilized Off Route,Incident_Vision,Total Incidents
0,2022-01-01 04:00:00+00:00,73.0,2.319333,2.800000,0.400000,1.68,9.0,20.907913,36.000000,2022-01-01,...,0,1,0,21,87,1,10,7,1,144
1,2022-01-02 04:00:00+00:00,73.0,-6.297333,6.000000,0.000000,4.20,14.0,22.264771,39.239998,2022-01-02,...,0,1,0,41,64,1,6,6,1,131
2,2022-01-03 04:00:00+00:00,3.0,-9.074418,0.000000,0.000000,0.00,0.0,16.981165,29.160000,2022-01-03,...,0,4,0,55,182,5,3,5,0,282
3,2022-01-04 04:00:00+00:00,3.0,-2.722333,0.000000,0.000000,0.00,0.0,21.398056,37.079998,2022-01-04,...,0,0,0,36,157,1,9,1,1,226
4,2022-01-05 04:00:00+00:00,51.0,1.611000,0.800000,0.800000,0.00,5.0,35.221176,64.079994,2022-01-05,...,0,1,0,48,205,0,17,6,1,298
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
725,2023-12-27 04:00:00+00:00,63.0,6.240167,20.000002,20.000002,0.00,19.0,17.253731,31.319998,2023-12-27,...,0,5,0,62,70,1,11,5,2,190
726,2023-12-28 04:00:00+00:00,53.0,6.121417,3.800000,3.800000,0.00,14.0,15.790833,28.440001,2023-12-28,...,0,1,0,55,31,0,13,1,0,143
727,2023-12-29 04:00:00+00:00,55.0,5.406834,5.800000,5.800000,0.00,18.0,15.986595,29.879999,2023-12-29,...,0,3,0,60,70,1,8,4,0,183
728,2023-12-30 04:00:00+00:00,51.0,0.463083,0.200000,0.200000,0.00,2.0,16.965895,30.960001,2023-12-30,...,2,5,0,65,60,1,17,7,4,195


In [136]:
merged_df = merged_df.rename(columns={'Date': 'date_from_ttc'})
merged_df

Unnamed: 0,date_from_weather,weather_code,temperature_2m_mean,precipitation_sum,rain_sum,snowfall_sum,precipitation_hours,wind_speed_10m_max,wind_gusts_10m_max,date_from_ttc,...,Incident_Held By,Incident_Investigation,Incident_Late Entering Service,Incident_Mechanical,Incident_Operations - Operator,Incident_Road Blocked - NON-TTC Collision,Incident_Security,Incident_Utilized Off Route,Incident_Vision,Total Incidents
0,2022-01-01 04:00:00+00:00,73.0,2.319333,2.800000,0.400000,1.68,9.0,20.907913,36.000000,2022-01-01,...,0,1,0,21,87,1,10,7,1,144
1,2022-01-02 04:00:00+00:00,73.0,-6.297333,6.000000,0.000000,4.20,14.0,22.264771,39.239998,2022-01-02,...,0,1,0,41,64,1,6,6,1,131
2,2022-01-03 04:00:00+00:00,3.0,-9.074418,0.000000,0.000000,0.00,0.0,16.981165,29.160000,2022-01-03,...,0,4,0,55,182,5,3,5,0,282
3,2022-01-04 04:00:00+00:00,3.0,-2.722333,0.000000,0.000000,0.00,0.0,21.398056,37.079998,2022-01-04,...,0,0,0,36,157,1,9,1,1,226
4,2022-01-05 04:00:00+00:00,51.0,1.611000,0.800000,0.800000,0.00,5.0,35.221176,64.079994,2022-01-05,...,0,1,0,48,205,0,17,6,1,298
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
725,2023-12-27 04:00:00+00:00,63.0,6.240167,20.000002,20.000002,0.00,19.0,17.253731,31.319998,2023-12-27,...,0,5,0,62,70,1,11,5,2,190
726,2023-12-28 04:00:00+00:00,53.0,6.121417,3.800000,3.800000,0.00,14.0,15.790833,28.440001,2023-12-28,...,0,1,0,55,31,0,13,1,0,143
727,2023-12-29 04:00:00+00:00,55.0,5.406834,5.800000,5.800000,0.00,18.0,15.986595,29.879999,2023-12-29,...,0,3,0,60,70,1,8,4,0,183
728,2023-12-30 04:00:00+00:00,51.0,0.463083,0.200000,0.200000,0.00,2.0,16.965895,30.960001,2023-12-30,...,2,5,0,65,60,1,17,7,4,195


In [137]:
merged_df_no_datetime = merged_df.copy()
merged_df_no_datetime['date_from_weather'] = merged_df_no_datetime['date_from_weather'].astype(str)
merged_df_no_datetime['date_from_ttc'] = merged_df_no_datetime['date_from_ttc'].astype(str)

In [138]:
merged_df_no_datetime.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 730 entries, 0 to 729
Data columns (total 26 columns):
 #   Column                                     Non-Null Count  Dtype  
---  ------                                     --------------  -----  
 0   date_from_weather                          730 non-null    object 
 1   weather_code                               730 non-null    float32
 2   temperature_2m_mean                        730 non-null    float32
 3   precipitation_sum                          730 non-null    float32
 4   rain_sum                                   730 non-null    float32
 5   snowfall_sum                               730 non-null    float32
 6   precipitation_hours                        730 non-null    float32
 7   wind_speed_10m_max                         730 non-null    float32
 8   wind_gusts_10m_max                         730 non-null    float32
 9   date_from_ttc                              730 non-null    object 
 10  Incident_Cleaning - Disinf

In [139]:
merged_df_no_datetime.to_excel('fully_merged.xlsx')