In [1]:
import pandas as pd
from datetime import datetime, date, timedelta
import weather_scrap as w
import alerts_in_ua as a
from sklearn.preprocessing import LabelEncoder
import holidays

### Merging regions and weather

In [2]:
df_regions = pd.read_csv(f"../data/regions.csv")

In [3]:
df_regions.head(5)

Unnamed: 0,region,center_city_ua,center_city_en,region_alt,region_id
0,АР Крим,Сімферополь,Simferopol,Крим,1
1,Вінницька,Вінниця,Vinnytsia,Вінниччина,2
2,Волинська,Луцьк,Lutsk,Волинь,3
3,Дніпропетровська,Дніпро,Dnipro,Дніпропетровщина,4
4,Донецька,Донецьк,Donetsk,Донеччина,5


In [4]:
df_regions["region"].unique()

array(['АР Крим', 'Вінницька', 'Волинська', 'Дніпропетровська',
       'Донецька', 'Житомирська', 'Закарпатська', 'Запорізька',
       'Івано-Франківська', 'Київська', 'Кіровоградська', 'Луганська',
       'Львівська', 'Миколаївська', 'Одеська', 'Полтавська', 'Рівненська',
       'Сумська', 'Тернопільська', 'Харківська', 'Херсонська',
       'Хмельницька', 'Черкаська', 'Чернівецька', 'Чернігівська'],
      dtype=object)

In [5]:
df_regions["region"] = df_regions["region"].apply(lambda x: x + " обл.")

In [6]:
weather_regions = [
    "Vinnytsia, Ukraine",
    "Lutsk, Ukraine",
    "Dnipro, Ukraine",
    "Donetsk, Ukraine",
    "Zhytomyr, Ukraine",
    "Uzhhorod, Ukraine",
    "Zaporizhia, Ukraine",
    "Ivano-Frankivsk, Ukraine",
    "Kyiv, Ukraine",
    "Kropyvnytskyi, Ukraine",
    "Lviv, Ukraine",
    "Mykolaiv, Ukraine",
    "Odessa, Ukraine",
    "Poltava, Ukraine",
    "Rivne, Ukraine",
    "Sumy, Ukraine",
    "Ternopil, Ukraine",
    "Kharkiv, Ukraine",
    "Kherson, Ukraine",
    "Khmelnytskyi, Ukraine",
    "Cherkasy, Ukraine",
    "Chernivtsi, Ukraine",
    "Chernigiv, Ukraine"
]

In [7]:
all_data = []

for region in weather_regions:
    forecast = w.generate_forecast(region)
    
    df_region = pd.DataFrame(forecast)
    all_data.append(df_region)

df_weather = pd.concat(all_data, ignore_index=True)
df_weather.head()

Unnamed: 0,city_resolvedAddress,day_datetime,day_datetimeEpoch,day_tempmax,day_tempmin,day_temp,day_dew,day_humidity,day_precip,day_precipcover,...,hour_snowdepth,hour_preciptype,hour_windgust,hour_windspeed,hour_winddir,hour_pressure,hour_visibility,hour_cloudcover,hour_uvindex,hour_conditions
0,"Вінниця, Україна",2025-04-22,1745269200,25.0,10.5,18.1,4.0,41.0,0.0,0.0,...,0.0,,5.4,1.8,190.8,1014.0,24.1,40.8,1.0,Partially cloudy
1,"Вінниця, Україна",2025-04-22,1745269200,25.0,10.5,18.1,4.0,41.0,0.0,0.0,...,0.0,,2.2,1.4,113.4,1014.0,24.1,58.2,0.0,Partially cloudy
2,"Вінниця, Україна",2025-04-22,1745269200,25.0,10.5,18.1,4.0,41.0,0.0,0.0,...,0.0,,6.1,4.3,69.0,1015.0,24.1,60.5,0.0,Partially cloudy
3,"Вінниця, Україна",2025-04-22,1745269200,25.0,10.5,18.1,4.0,41.0,0.0,0.0,...,0.0,,7.6,4.3,90.3,1015.0,24.1,61.4,0.0,Partially cloudy
4,"Вінниця, Україна",2025-04-23,1745355600,25.5,10.6,18.2,5.1,45.8,0.0,0.0,...,0.0,,10.4,5.4,118.8,1015.0,24.1,52.7,0.0,Partially cloudy


In [8]:
df_weather.columns

Index(['city_resolvedAddress', 'day_datetime', 'day_datetimeEpoch',
       'day_tempmax', 'day_tempmin', 'day_temp', 'day_dew', 'day_humidity',
       'day_precip', 'day_precipcover', 'day_solarradiation',
       'day_solarenergy', 'day_uvindex', 'day_sunrise', 'day_sunset',
       'day_moonphase', 'hour_datetime', 'hour_datetimeEpoch', 'hour_temp',
       'hour_humidity', 'hour_dew', 'hour_precip', 'hour_precipprob',
       'hour_snow', 'hour_snowdepth', 'hour_preciptype', 'hour_windgust',
       'hour_windspeed', 'hour_winddir', 'hour_pressure', 'hour_visibility',
       'hour_cloudcover', 'hour_uvindex', 'hour_conditions'],
      dtype='object')

In [9]:
df_weather["city"] = df_weather["city_resolvedAddress"].apply(lambda x: x.split(",")[0])
df_weather["city"] = df_weather["city"].replace('Хмельницька область', "Хмельницький")

In [10]:
df_weather.head(5)

Unnamed: 0,city_resolvedAddress,day_datetime,day_datetimeEpoch,day_tempmax,day_tempmin,day_temp,day_dew,day_humidity,day_precip,day_precipcover,...,hour_preciptype,hour_windgust,hour_windspeed,hour_winddir,hour_pressure,hour_visibility,hour_cloudcover,hour_uvindex,hour_conditions,city
0,"Вінниця, Україна",2025-04-22,1745269200,25.0,10.5,18.1,4.0,41.0,0.0,0.0,...,,5.4,1.8,190.8,1014.0,24.1,40.8,1.0,Partially cloudy,Вінниця
1,"Вінниця, Україна",2025-04-22,1745269200,25.0,10.5,18.1,4.0,41.0,0.0,0.0,...,,2.2,1.4,113.4,1014.0,24.1,58.2,0.0,Partially cloudy,Вінниця
2,"Вінниця, Україна",2025-04-22,1745269200,25.0,10.5,18.1,4.0,41.0,0.0,0.0,...,,6.1,4.3,69.0,1015.0,24.1,60.5,0.0,Partially cloudy,Вінниця
3,"Вінниця, Україна",2025-04-22,1745269200,25.0,10.5,18.1,4.0,41.0,0.0,0.0,...,,7.6,4.3,90.3,1015.0,24.1,61.4,0.0,Partially cloudy,Вінниця
4,"Вінниця, Україна",2025-04-23,1745355600,25.5,10.6,18.2,5.1,45.8,0.0,0.0,...,,10.4,5.4,118.8,1015.0,24.1,52.7,0.0,Partially cloudy,Вінниця


In [11]:
df_weather_reg = pd.merge(df_weather, df_regions, left_on="city",right_on="center_city_ua")

In [12]:
df_weather_reg.head(5)

Unnamed: 0,city_resolvedAddress,day_datetime,day_datetimeEpoch,day_tempmax,day_tempmin,day_temp,day_dew,day_humidity,day_precip,day_precipcover,...,hour_visibility,hour_cloudcover,hour_uvindex,hour_conditions,city,region,center_city_ua,center_city_en,region_alt,region_id
0,"Вінниця, Україна",2025-04-22,1745269200,25.0,10.5,18.1,4.0,41.0,0.0,0.0,...,24.1,40.8,1.0,Partially cloudy,Вінниця,Вінницька обл.,Вінниця,Vinnytsia,Вінниччина,2
1,"Вінниця, Україна",2025-04-22,1745269200,25.0,10.5,18.1,4.0,41.0,0.0,0.0,...,24.1,58.2,0.0,Partially cloudy,Вінниця,Вінницька обл.,Вінниця,Vinnytsia,Вінниччина,2
2,"Вінниця, Україна",2025-04-22,1745269200,25.0,10.5,18.1,4.0,41.0,0.0,0.0,...,24.1,60.5,0.0,Partially cloudy,Вінниця,Вінницька обл.,Вінниця,Vinnytsia,Вінниччина,2
3,"Вінниця, Україна",2025-04-22,1745269200,25.0,10.5,18.1,4.0,41.0,0.0,0.0,...,24.1,61.4,0.0,Partially cloudy,Вінниця,Вінницька обл.,Вінниця,Vinnytsia,Вінниччина,2
4,"Вінниця, Україна",2025-04-23,1745355600,25.5,10.6,18.2,5.1,45.8,0.0,0.0,...,24.1,52.7,0.0,Partially cloudy,Вінниця,Вінницька обл.,Вінниця,Vinnytsia,Вінниччина,2


In [13]:
df_weather_reg["region"] = df_weather_reg["region"].apply(lambda x: x + " область")

In [14]:
kyiv_reg = df_weather_reg[df_weather_reg["region"] == "Київська область"].copy()
kyiv_reg["region"] = "Київ"
kyiv_reg["region_id"] = 1

df_weather_reg = pd.concat([df_weather_reg, kyiv_reg])

### Merging weather, alarms and regions together

In [15]:
df_alarms = pd.DataFrame(a.get_alerts())
df_alarms.head()

Unnamed: 0,region,status
0,Автономна Республіка Крим,A
1,Волинська область,N
2,Вінницька область,N
3,Дніпропетровська область,N
4,Донецька область,N


In [16]:
df_alarms["status"] = df_alarms["status"].apply(lambda x: 1 if x == 'A' else 0)

In [17]:
df_weather_reg_al = df_weather_reg.merge(df_alarms, how="left", left_on="region", right_on="region")

In [18]:
df_weather_reg_al["day_datetime"] = pd.to_datetime(df_weather_reg_al["day_datetime"])

### Merging vectorized ISW to the rest

In [19]:
df_isw_vect = pd.read_csv("../data/ISW_vector.csv")

In [20]:
df_isw_vect.tail()

Unnamed: 0,date,content,lemma_content,stem_content,keywords
1080,2025-02-28,russian offensive campaign assessment 2025 oli...,russian offensive campaign assessment 2025 oli...,russian offens campaign assess 2025 olivia gib...,0.51 0.508 0.877 0.787 0.518 0.538 0.509 0.501...
1081,2025-03-01,russian offensive campaign assessment 2025 dav...,russian offensive campaign assessment 2025 dav...,russian offens campaign assess 2025 davit gasp...,0.475 0.472 0.786 0.43 0.714 0.439 0.473 0.509...
1082,2025-04-17,russian offensive campaign assessment 2025 gra...,russian offensive campaign assessment 2025 gra...,russian offens campaign assess 2025 grace mapp...,0.494 0.493 0.758 0.791 0.484 0.557 0.509 0.48...
1083,2025-04-20,russian offensive campaign assessment 2025 dar...,russian offensive campaign assessment 2025 dar...,russian offens campaign assess 2025 daria novi...,0.52 0.515 0.492 0.699 0.782 0.526 0.508 0.473...
1084,2025-04-21,russian offensive campaign assessment 2025 nic...,russian offensive campaign assessment 2025 nic...,russian offens campaign assess 2025 nicol wolk...,0.49 0.474 0.474 0.693 0.81 0.463 0.544 0.531 ...


In [21]:
yesterday_str = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')
df_isw_vect = df_isw_vect[df_isw_vect['date'] == yesterday_str]

In [22]:
#df_isw_vect["date"] = pd.to_datetime(df_isw_vect["date"])
#df_isw_vect["date_tomorrow_datetime"] = df_isw_vect["date"] + datetime.timedelta(days=1)

In [23]:
#df_ready = df_weather_reg_al.merge(df_isw_vect, how="left", left_on="day_datetime", right_on="date_tomorrow_datetime")
df_isw_vect = pd.concat([df_isw_vect] * len(df_weather_reg_al), ignore_index=True)
df_ready = pd.concat([df_weather_reg_al.reset_index(drop=True), df_isw_vect.reset_index(drop=True)], axis=1)

In [24]:
df_ready.head()

Unnamed: 0,city_resolvedAddress,day_datetime,day_datetimeEpoch,day_tempmax,day_tempmin,day_temp,day_dew,day_humidity,day_precip,day_precipcover,...,center_city_ua,center_city_en,region_alt,region_id,status,date,content,lemma_content,stem_content,keywords
0,"Вінниця, Україна",2025-04-22,1745269200,25.0,10.5,18.1,4.0,41.0,0.0,0.0,...,Вінниця,Vinnytsia,Вінниччина,2,,2025-04-21,russian offensive campaign assessment 2025 nic...,russian offensive campaign assessment 2025 nic...,russian offens campaign assess 2025 nicol wolk...,0.49 0.474 0.474 0.693 0.81 0.463 0.544 0.531 ...
1,"Вінниця, Україна",2025-04-22,1745269200,25.0,10.5,18.1,4.0,41.0,0.0,0.0,...,Вінниця,Vinnytsia,Вінниччина,2,,2025-04-21,russian offensive campaign assessment 2025 nic...,russian offensive campaign assessment 2025 nic...,russian offens campaign assess 2025 nicol wolk...,0.49 0.474 0.474 0.693 0.81 0.463 0.544 0.531 ...
2,"Вінниця, Україна",2025-04-22,1745269200,25.0,10.5,18.1,4.0,41.0,0.0,0.0,...,Вінниця,Vinnytsia,Вінниччина,2,,2025-04-21,russian offensive campaign assessment 2025 nic...,russian offensive campaign assessment 2025 nic...,russian offens campaign assess 2025 nicol wolk...,0.49 0.474 0.474 0.693 0.81 0.463 0.544 0.531 ...
3,"Вінниця, Україна",2025-04-22,1745269200,25.0,10.5,18.1,4.0,41.0,0.0,0.0,...,Вінниця,Vinnytsia,Вінниччина,2,,2025-04-21,russian offensive campaign assessment 2025 nic...,russian offensive campaign assessment 2025 nic...,russian offens campaign assess 2025 nicol wolk...,0.49 0.474 0.474 0.693 0.81 0.463 0.544 0.531 ...
4,"Вінниця, Україна",2025-04-23,1745355600,25.5,10.6,18.2,5.1,45.8,0.0,0.0,...,Вінниця,Vinnytsia,Вінниччина,2,,2025-04-21,russian offensive campaign assessment 2025 nic...,russian offensive campaign assessment 2025 nic...,russian offens campaign assess 2025 nicol wolk...,0.49 0.474 0.474 0.693 0.81 0.463 0.544 0.531 ...


In [25]:
df_ready.columns

Index(['city_resolvedAddress', 'day_datetime', 'day_datetimeEpoch',
       'day_tempmax', 'day_tempmin', 'day_temp', 'day_dew', 'day_humidity',
       'day_precip', 'day_precipcover', 'day_solarradiation',
       'day_solarenergy', 'day_uvindex', 'day_sunrise', 'day_sunset',
       'day_moonphase', 'hour_datetime', 'hour_datetimeEpoch', 'hour_temp',
       'hour_humidity', 'hour_dew', 'hour_precip', 'hour_precipprob',
       'hour_snow', 'hour_snowdepth', 'hour_preciptype', 'hour_windgust',
       'hour_windspeed', 'hour_winddir', 'hour_pressure', 'hour_visibility',
       'hour_cloudcover', 'hour_uvindex', 'hour_conditions', 'city', 'region',
       'center_city_ua', 'center_city_en', 'region_alt', 'region_id', 'status',
       'date', 'content', 'lemma_content', 'stem_content', 'keywords'],
      dtype='object')

In [26]:
df_ready.rename({"status": "event_all_region"}, axis=1, inplace=True)

In [27]:
df_ready.head()

Unnamed: 0,city_resolvedAddress,day_datetime,day_datetimeEpoch,day_tempmax,day_tempmin,day_temp,day_dew,day_humidity,day_precip,day_precipcover,...,center_city_ua,center_city_en,region_alt,region_id,event_all_region,date,content,lemma_content,stem_content,keywords
0,"Вінниця, Україна",2025-04-22,1745269200,25.0,10.5,18.1,4.0,41.0,0.0,0.0,...,Вінниця,Vinnytsia,Вінниччина,2,,2025-04-21,russian offensive campaign assessment 2025 nic...,russian offensive campaign assessment 2025 nic...,russian offens campaign assess 2025 nicol wolk...,0.49 0.474 0.474 0.693 0.81 0.463 0.544 0.531 ...
1,"Вінниця, Україна",2025-04-22,1745269200,25.0,10.5,18.1,4.0,41.0,0.0,0.0,...,Вінниця,Vinnytsia,Вінниччина,2,,2025-04-21,russian offensive campaign assessment 2025 nic...,russian offensive campaign assessment 2025 nic...,russian offens campaign assess 2025 nicol wolk...,0.49 0.474 0.474 0.693 0.81 0.463 0.544 0.531 ...
2,"Вінниця, Україна",2025-04-22,1745269200,25.0,10.5,18.1,4.0,41.0,0.0,0.0,...,Вінниця,Vinnytsia,Вінниччина,2,,2025-04-21,russian offensive campaign assessment 2025 nic...,russian offensive campaign assessment 2025 nic...,russian offens campaign assess 2025 nicol wolk...,0.49 0.474 0.474 0.693 0.81 0.463 0.544 0.531 ...
3,"Вінниця, Україна",2025-04-22,1745269200,25.0,10.5,18.1,4.0,41.0,0.0,0.0,...,Вінниця,Vinnytsia,Вінниччина,2,,2025-04-21,russian offensive campaign assessment 2025 nic...,russian offensive campaign assessment 2025 nic...,russian offens campaign assess 2025 nicol wolk...,0.49 0.474 0.474 0.693 0.81 0.463 0.544 0.531 ...
4,"Вінниця, Україна",2025-04-23,1745355600,25.5,10.6,18.2,5.1,45.8,0.0,0.0,...,Вінниця,Vinnytsia,Вінниччина,2,,2025-04-21,russian offensive campaign assessment 2025 nic...,russian offensive campaign assessment 2025 nic...,russian offens campaign assess 2025 nicol wolk...,0.49 0.474 0.474 0.693 0.81 0.463 0.544 0.531 ...


In [28]:
df = df_ready.fillna(df_ready.median(numeric_only=True))
df['hour_preciptype'] = df['hour_preciptype'].astype(str)

In [29]:
df_encoded = pd.get_dummies(df, columns=['hour_preciptype'], prefix='hour_preciptype')

In [30]:
bool_columns = df_encoded.select_dtypes(include=['bool']).columns
df_encoded[bool_columns] = df_encoded[bool_columns].astype(int)

In [31]:
preciptype = [
    "hour_preciptype_['freezingrain']",
    "hour_preciptype_['ice']",
    "hour_preciptype_['rain', 'snow']",
    "hour_preciptype_['rain']",
    "hour_preciptype_['snow']"
]

for col in preciptype:
    if col not in df_encoded.columns:
        df_encoded[col] = 0

In [32]:
todrop = [
    'city_resolvedAddress',
    'day_datetime', 
    'city', 
    'region',
    'center_city_ua', 
    'center_city_en', 
    'region_alt', 
    'date', 
    'content', 
    'lemma_content', 
    'stem_content']

In [33]:
df_encoded = df_encoded.drop(todrop, axis=1)

In [34]:
df_encoded.columns

Index(['day_datetimeEpoch', 'day_tempmax', 'day_tempmin', 'day_temp',
       'day_dew', 'day_humidity', 'day_precip', 'day_precipcover',
       'day_solarradiation', 'day_solarenergy', 'day_uvindex', 'day_sunrise',
       'day_sunset', 'day_moonphase', 'hour_datetime', 'hour_datetimeEpoch',
       'hour_temp', 'hour_humidity', 'hour_dew', 'hour_precip',
       'hour_precipprob', 'hour_snow', 'hour_snowdepth', 'hour_windgust',
       'hour_windspeed', 'hour_winddir', 'hour_pressure', 'hour_visibility',
       'hour_cloudcover', 'hour_uvindex', 'hour_conditions', 'region_id',
       'event_all_region', 'keywords', 'hour_preciptype_None',
       'hour_preciptype_['rain']', 'hour_preciptype_['freezingrain']',
       'hour_preciptype_['ice']', 'hour_preciptype_['rain', 'snow']',
       'hour_preciptype_['snow']'],
      dtype='object')

In [35]:
df_encoded.shape

(644, 40)

In [36]:
temp_df = df_encoded['keywords'].str.split(expand=True)
temp_df = temp_df.astype('float64')
df_encoded_v2 = pd.concat([df_encoded.drop('keywords', axis=1), temp_df], axis=1)

In [37]:
le = LabelEncoder()
df_encoded_v2['hour_conditions'] = le.fit_transform(df['hour_conditions'])

In [38]:
df_encoded_v2['hour_datetime'] = pd.to_datetime(df_encoded_v2['hour_datetime'], format='%H:%M:%S')
df_encoded_v2['day_sunrise'] = pd.to_datetime(df_encoded_v2['day_sunrise'], format='%H:%M:%S')
df_encoded_v2['day_sunset'] = pd.to_datetime(df_encoded_v2['day_sunset'], format='%H:%M:%S')
df_encoded_v2['hour_datetime'] = (df_encoded_v2['hour_datetime'].dt.hour * 3600 +
                                  df_encoded_v2['hour_datetime'].dt.minute * 60 +
                                  df_encoded_v2['hour_datetime'].dt.second) / 86400

df_encoded_v2['day_sunrise'] = (df_encoded_v2['day_sunrise'].dt.hour * 3600 +
                                df_encoded_v2['day_sunrise'].dt.minute * 60 +
                                df_encoded_v2['day_sunrise'].dt.second) / 86400

df_encoded_v2['day_sunset'] = (df_encoded_v2['day_sunset'].dt.hour * 3600 +
                               df_encoded_v2['day_sunset'].dt.minute * 60 +
                               df_encoded_v2['day_sunset'].dt.second) / 86400

In [39]:
df_encoded_v2.head()

Unnamed: 0,day_datetimeEpoch,day_tempmax,day_tempmin,day_temp,day_dew,day_humidity,day_precip,day_precipcover,day_solarradiation,day_solarenergy,...,990,991,992,993,994,995,996,997,998,999
0,1745269200,25.0,10.5,18.1,4.0,41.0,0.0,0.0,284.7,24.9,...,0.564,0.839,0.631,0.644,0.547,0.573,0.582,0.547,0.568,0.7
1,1745269200,25.0,10.5,18.1,4.0,41.0,0.0,0.0,284.7,24.9,...,0.564,0.839,0.631,0.644,0.547,0.573,0.582,0.547,0.568,0.7
2,1745269200,25.0,10.5,18.1,4.0,41.0,0.0,0.0,284.7,24.9,...,0.564,0.839,0.631,0.644,0.547,0.573,0.582,0.547,0.568,0.7
3,1745269200,25.0,10.5,18.1,4.0,41.0,0.0,0.0,284.7,24.9,...,0.564,0.839,0.631,0.644,0.547,0.573,0.582,0.547,0.568,0.7
4,1745355600,25.5,10.6,18.2,5.1,45.8,0.0,0.0,276.8,24.1,...,0.564,0.839,0.631,0.644,0.547,0.573,0.582,0.547,0.568,0.7


In [40]:
df_encoded_v2['timestamp'] = pd.to_datetime(df_encoded_v2['hour_datetimeEpoch'], unit='s').dt.strftime('%Y-%m-%d %H:%M:%S')
df_encoded_v2['timestamp'] = pd.to_datetime(df_encoded_v2['timestamp'])

In [41]:
df_encoded_v2 = df_encoded_v2.set_index(pd.DatetimeIndex(df_encoded_v2['timestamp']))

In [42]:
hourly_has_event = df_encoded_v2.groupby('region_id')['event_all_region'].resample('h').sum().gt(0).astype(int)
hours_with_events_per_day = hourly_has_event.groupby('region_id').resample('D', level=1).sum()

hours_with_events_per_day = hours_with_events_per_day.reset_index()

hours_with_events_per_day.rename(columns={'event_all_region': 'event_lastDay_region'}, inplace=True)

df_encoded_v2['date'] = df_encoded_v2['timestamp'].dt.date
hours_with_events_per_day['date'] = hours_with_events_per_day['timestamp'].dt.date

df_encoded_v2 = df_encoded_v2.merge(
    hours_with_events_per_day[['region_id', 'date', 'event_lastDay_region']],
    on=['region_id', 'date'],
    how='left'
)

In [43]:
df_encoded_v2 = df_encoded_v2.set_index(['timestamp', 'region_id'])
duplicates = df_encoded_v2.index.duplicated(keep=False)
df_encoded_v2 = df_encoded_v2[~df_encoded_v2.index.duplicated(keep='first')]
df_encoded_v2.reset_index(inplace=True, drop=False)

In [44]:
hours = [1, 2, 3, 6, 12, 18, 24]

In [45]:
for h in hours:
    df_encoded_v2 = df_encoded_v2.sort_values(by=['region_id', 'timestamp'])

    df_shifted = df_encoded_v2[['region_id', 'timestamp', 'event_all_region']].copy()
    df_shifted['timestamp'] = df_shifted['timestamp'] + pd.Timedelta(hours=h)
    df_shifted = df_shifted.rename(columns={'event_all_region': f'event_{h}h_ago'})

    df_encoded_v2 = df_encoded_v2.merge(df_shifted, on=['region_id', 'timestamp'], how='left')

    df_encoded_v2[f'event_{h}h_ago'] = df_encoded_v2[f'event_{h}h_ago'].fillna(0).astype(int)

In [46]:
class UkrainianECBHolidays(holidays.HolidayBase):
    def __init__(self, years=None, **kwargs):
        self.country = "UA"
        super().__init__(years=years, **kwargs)

    def _populate(self, year):
        self[date(year, 1, 1)] = "New Year's Day"
        self[date(year, 1, 7)] = "Orthodox Christmas"
        self[date(year, 3, 8)] = "International Women's Day"
        self[date(year, 5, 1)] = "Labour Day"
        self[date(year, 5, 8)] = "Day of Remembrance and Reconciliation"
        self[date(year, 5, 9)] = "Victory Day"
        self[date(year, 6, 28)] = "Constitution Day of Ukraine"
        self[date(year, 8, 24)] = "Independence Day of Ukraine"
        self[date(year, 12, 25)] = "Christmas (Western)"


class RussianECBHolidays(holidays.HolidayBase):
    def __init__(self, years=None, **kwargs):
        self.country = "RU"
        super().__init__(years=years, **kwargs)

    def _populate(self, year):
        self[date(year, 1, 1)] = "New Year's Day (Russia)"
        self[date(year, 1, 7)] = "Orthodox Christmas (Russia)"
        self[date(year, 2, 23)] = "Defender of the Fatherland Day (Russia)"
        self[date(year, 3, 8)] = "International Women's Day (Russia)"
        self[date(year, 5, 1)] = "Spring and Labor Day (Russia)"
        self[date(year, 5, 9)] = "Victory Day (Russia)"
        self[date(year, 6, 12)] = "Russia Day"
        self[date(year, 11, 4)] = "Unity Day (Russia)"

In [47]:
ua_holidays = UkrainianECBHolidays(years=range(2020, 2032))
ru_holidays = RussianECBHolidays(years=range(2020, 2032))

In [48]:
df_encoded_v2['ru_holiday'] = df_encoded_v2['date'].apply(lambda x: 1 if x in ru_holidays else 0)
df_encoded_v2['ua_holiday'] = df_encoded_v2['date'].apply(lambda x: 1 if x in ua_holidays else 0)

In [49]:
df_encoded_v2.drop('date', axis=1, inplace=True)

In [50]:
df_encoded_v2.drop('timestamp', axis=1, inplace=True)