## Initializing Weather Data

In [21]:
import pandas as pd

### Loading weather & trips

In [22]:
weather_original = pd.read_csv(r'C:\Users\etaiw\Desktop\Projects\aviya_project\Data\Data Weather\2024-02_data_weather.csv')
weather_raw = weather_original

In [23]:
weather_raw.head(5)

Unnamed: 0,תחנה,תאריך ושעה (שעון חורף),טמפרטורה (C°),טמפרטורת מקסימום (C°),טמפרטורת מינימום (C°),כיוון הרוח (מעלות),מהירות רוח (מטר לשניה),"כמות גשם (מ""מ)"
0,"תל-אביב, חוף",01/02/2024 00:00,9.8,9.9,9.8,126,2.4,0.0
1,"תל-אביב, חוף",01/02/2024 00:10,9.7,9.8,9.6,125,2.4,0.0
2,"תל-אביב, חוף",01/02/2024 00:20,9.6,9.6,9.6,122,2.6,0.0
3,"תל-אביב, חוף",01/02/2024 00:30,9.5,9.6,9.4,121,2.8,0.0
4,"תל-אביב, חוף",01/02/2024 00:40,9.4,9.4,9.3,118,3.1,0.0


### Convert weather data resolution to 30 min intervals

In [24]:
# Hebrew -> English 
col_rename_map = {
    'תחנה': 'station',
    'תאריך ושעה (שעון חורף)': 'datetime',
    'טמפרטורה (C°)': 'temp_c',
    'טמפרטורת מקסימום (C°)': 'temp_max_c',
    'טמפרטורת מינימום (C°)': 'temp_min_c',
    'כיוון הרוח (מעלות)': 'wind_direction_deg',
    'מהירות רוח (מטר לשניה)': 'wind_speed_ms',
    'כמות גשם (מ"מ)': 'rain_mm'
}

# Column rename to English
weather_raw = weather_raw.rename(columns=col_rename_map)
weather_raw.drop(['station'], axis=1, inplace=True)


In [25]:
weather_raw.head(2)

Unnamed: 0,datetime,temp_c,temp_max_c,temp_min_c,wind_direction_deg,wind_speed_ms,rain_mm
0,01/02/2024 00:00,9.8,9.9,9.8,126,2.4,0.0
1,01/02/2024 00:10,9.7,9.8,9.6,125,2.4,0.0


In [26]:
import pandas as pd

# Convert the 'datetime' column from string to a proper datetime object
weather_raw['datetime'] = pd.to_datetime(weather_raw['datetime'], dayfirst=True)

# Create a new column 'datetime_30' that rounds down each timestamp
# to the start of a 30-minute interval (00:00, 00:30, 01:00, etc.).
weather_raw['datetime_30'] = weather_raw['datetime'].dt.floor('30T')  # '30T' = 30 minutes

# Aggregate the data to 30-minute resolution:
#  - Group by 'datetime_30' (each 30-minute window)
#  - For each group of 3 rows (10-min steps):
#        * temp_c:      average temperature
#        * temp_max_c:  maximum temperature
#        * temp_min_c:  minimum temperature
#        * wind_direction_deg: average wind direction
#        * wind_speed_ms:      average wind speed
#        * rain_mm:            sum of rain in that 30-minute window
weather = (
    weather_raw
    .groupby('datetime_30')
    .agg(
        temp_c=('temp_c', 'mean'),                     # average temperature over 30 minutes
        temp_max_c=('temp_max_c', 'max'),              # highest temp in the 30-min window
        temp_min_c=('temp_min_c', 'min'),              # lowest temp in the 30-min window
        wind_direction_deg=('wind_direction_deg', 'mean'),  # average wind direction
        wind_speed_ms=('wind_speed_ms', 'mean'),       # average wind speed
        rain_mm=('rain_mm', 'sum')                     # total rain in 30 minutes
    )
    .reset_index()
    .rename(columns={'datetime_30': 'datetime'})       # rename back to 'datetime' for clarity
)

# Quick check: show the first 5 rows of the 30-minute resolution dataset
print(weather.head())


             datetime    temp_c  temp_max_c  temp_min_c  wind_direction_deg  \
0 2024-02-01 00:00:00  9.700000         9.9         9.6          124.333333   
1 2024-02-01 00:30:00  9.433333         9.6         9.3          120.000000   
2 2024-02-01 01:00:00  9.333333         9.5         9.2          118.333333   
3 2024-02-01 01:30:00  9.200000         9.3         9.1          126.333333   
4 2024-02-01 02:00:00  9.033333         9.9         8.8          152.666667   

   wind_speed_ms  rain_mm  
0       2.466667      0.0  
1       3.033333      0.0  
2       2.733333      0.0  
3       2.866667      0.0  
4       2.466667      0.0  


  weather_raw['datetime_30'] = weather_raw['datetime'].dt.floor('30T')  # '30T' = 30 minutes


In [27]:
# save as csv to clean data 
weather.to_csv(r'C:\Users\etaiw\Desktop\Projects\aviya_project\Data\Clean Data\Final Weather\2024-02_Final_Weather.csv', index=False)