In [2]:
import datetime
import numpy as np
import pandas as pd

import pickle

from sklearn.feature_extraction.text import TfidfVectorizer as tf_idf


In [3]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [4]:
INPUT_DATA_FOLDER = "data"
REPORTS_DATA_FILE = "ISW.parquet"

OUTPUT_FOLDER = "output"
ISW_OUTPUT_DATA_FILE = "all_isw.csv"
WEATHER_EVENTS_OUTPUT_DATA_FILE = "all_weather_by_hour_2023-2025_v1.csv"

MODEL_FOLDER = "model"

tfidf_transformer_model = "tfidf_transformer"
count_vectorizer_model = "count_vectorizer"

tfidf_transformer_version = "v1"
count_vectorizer_version = "v1"

In [5]:
def isNaN(num):
    return num != num

## reading data

In [8]:
df_isw = pd.read_parquet(f"../{INPUT_DATA_FOLDER}/{REPORTS_DATA_FILE}")

In [9]:
df_isw.head(5)

Unnamed: 0,date,content
0,2022-02-24,"Mason Clark, George Barros, and Kateryna Stepa..."
1,2022-02-25,"Mason Clark, George Barros, and Kateryna Stepa..."
2,2022-02-26,"Mason Clark, George Barros, and Katya Stepanen..."
3,2022-02-27,"Mason Clark, George Barros, and Kateryna Stepa..."
4,2022-02-28,"Mason Clark, George Barros, and Kateryna Stepa..."


## preparing ISW reports

## reading models

In [7]:
#load the content
tfidf = pickle.load(open(f"{MODEL_FOLDER}/{tfidf_transformer_model}_{tfidf_transformer_version}.pkl", "rb"))
cv = pickle.load(open(f"{MODEL_FOLDER}/{count_vectorizer_model}_{count_vectorizer_version}.pkl", "rb"))

In [8]:
df_isw['keywords'] = df_isw['text_preprocessed_lemm'].apply(lambda x: tf_idf.conver_doc_to_vector(x,cv,tfidf))

In [9]:
df_isw.head(5)

Unnamed: 0,date,short_url,title,text_title,full_url,main_html,main_html_v2,main_text,text_preprocessed,text_preprocessed_lemm,text_preprocessed_stemm,keywords
0,2022-02-25,RusCampaignFeb25,Russia-Ukraine Warning Update: Russian Offensi...,Russia-Ukraine Warning Update: Russian Offensi...,/backgrounder/russia-ukraine-warning-update-ru...,"<div class=""field field-name-body field-type-t...","<div class=""field field-name-body field-type-t...",\nRussian forces entered major Ukrainian citie...,russian forces entered major ukrainian cities...,russian force entered major ukrainian city in...,russian forc enter major ukrainian citi inclu...,"{'kyiv': 0.355, 'zero': 0.322, 'pm': 0.24, 'lo..."
1,2022-02-26,RusCampaignFeb26,Russia-Ukraine Warning Update: Russian Offensi...,Russia-Ukraine Warning Update: Russian Offensi...,/backgrounder/russia-ukraine-warning-update-ru...,"<div class=""field field-name-body field-type-t...","<div class=""field field-name-body field-type-t...",\nRussian forces’ main axes of advance in the ...,russian forces main axes advance last twenty ...,russian force main ax advance last twenty fou...,russian forc main axe advanc last twenti four...,"{'kyiv': 0.422, 'february': 0.189, '11am': 0.1..."
2,2022-02-27,RusCampaignFeb27,Russia-Ukraine Warning Update: Russian Offensi...,Russia-Ukraine Warning Update: Russian Offensi...,/backgrounder/russia-ukraine-warning-update-ru...,"<div class=""field field-name-body field-type-t...","<div class=""field field-name-body field-type-t...",\nThe Russian military has likely recognized t...,russian military likely recognized initial ex...,russian military likely recognized initial ex...,russian militari like recogn initi expect lim...,"{'kyiv': 0.309, 'february': 0.223, 'twenty': 0..."
3,2022-02-28,RusCampaignFeb28,"Russian Offensive Campaign Assessment, Februar...","Russian Offensive Campaign Assessment, Februar...",/backgrounder/russian-offensive-campaign-asses...,"<div class=""field field-name-body field-type-t...","<div class=""field field-name-body field-type-t...",\nThe Russian military is reorganizing its mil...,russian military reorganizing military effort...,russian military reorganizing military effort...,russian militari reorgan militari effort atte...,"{'kyiv': 0.288, 'february': 0.235, 'twenty': 0..."
4,2022-03-01,RusCampaignMar1,"Russian Offensive Campaign Assessment, March 1...","Russian Offensive Campaign Assessment, March 1",/backgrounder/russian-offensive-campaign-asses...,"<div class=""field field-name-body field-type-t...","<div class=""field field-name-body field-type-t...",\nRussian forces are completing the reinforcem...,russian forces completing reinforcement resup...,russian force completing reinforcement resupp...,russian forc complet reinforc resuppli troop ...,"{'kyiv': 0.365, 'chernihiv': 0.228, 'hour': 0...."


In [10]:
df_isw["date_datetime"] = pd.to_datetime(df_isw["date"])

In [11]:
df_isw['date_tomorrow_datetime'] = df_isw['date_datetime'].apply(lambda x: x+datetime.timedelta(days=1))

In [12]:
df_isw = df_isw.rename(columns = {"date_datetime":"report_date"})
df_isw.to_csv(f"{OUTPUT_FOLDER}/{ISW_OUTPUT_DATA_FILE}", sep=";", index=False)

In [13]:
df_isw.head(5)

Unnamed: 0,date,short_url,title,text_title,full_url,main_html,main_html_v2,main_text,text_preprocessed,text_preprocessed_lemm,text_preprocessed_stemm,keywords,report_date,date_tomorrow_datetime
0,2022-02-25,RusCampaignFeb25,Russia-Ukraine Warning Update: Russian Offensi...,Russia-Ukraine Warning Update: Russian Offensi...,/backgrounder/russia-ukraine-warning-update-ru...,"<div class=""field field-name-body field-type-t...","<div class=""field field-name-body field-type-t...",\nRussian forces entered major Ukrainian citie...,russian forces entered major ukrainian cities...,russian force entered major ukrainian city in...,russian forc enter major ukrainian citi inclu...,"{'kyiv': 0.355, 'zero': 0.322, 'pm': 0.24, 'lo...",2022-02-25,2022-02-26
1,2022-02-26,RusCampaignFeb26,Russia-Ukraine Warning Update: Russian Offensi...,Russia-Ukraine Warning Update: Russian Offensi...,/backgrounder/russia-ukraine-warning-update-ru...,"<div class=""field field-name-body field-type-t...","<div class=""field field-name-body field-type-t...",\nRussian forces’ main axes of advance in the ...,russian forces main axes advance last twenty ...,russian force main ax advance last twenty fou...,russian forc main axe advanc last twenti four...,"{'kyiv': 0.422, 'february': 0.189, '11am': 0.1...",2022-02-26,2022-02-27
2,2022-02-27,RusCampaignFeb27,Russia-Ukraine Warning Update: Russian Offensi...,Russia-Ukraine Warning Update: Russian Offensi...,/backgrounder/russia-ukraine-warning-update-ru...,"<div class=""field field-name-body field-type-t...","<div class=""field field-name-body field-type-t...",\nThe Russian military has likely recognized t...,russian military likely recognized initial ex...,russian military likely recognized initial ex...,russian militari like recogn initi expect lim...,"{'kyiv': 0.309, 'february': 0.223, 'twenty': 0...",2022-02-27,2022-02-28
3,2022-02-28,RusCampaignFeb28,"Russian Offensive Campaign Assessment, Februar...","Russian Offensive Campaign Assessment, Februar...",/backgrounder/russian-offensive-campaign-asses...,"<div class=""field field-name-body field-type-t...","<div class=""field field-name-body field-type-t...",\nThe Russian military is reorganizing its mil...,russian military reorganizing military effort...,russian military reorganizing military effort...,russian militari reorgan militari effort atte...,"{'kyiv': 0.288, 'february': 0.235, 'twenty': 0...",2022-02-28,2022-03-01
4,2022-03-01,RusCampaignMar1,"Russian Offensive Campaign Assessment, March 1...","Russian Offensive Campaign Assessment, March 1",/backgrounder/russian-offensive-campaign-asses...,"<div class=""field field-name-body field-type-t...","<div class=""field field-name-body field-type-t...",\nRussian forces are completing the reinforcem...,russian forces completing reinforcement resup...,russian force completing reinforcement resupp...,russian forc complet reinforc resuppli troop ...,"{'kyiv': 0.365, 'chernihiv': 0.228, 'hour': 0....",2022-03-01,2022-03-02


## prepare events data

In [14]:
EVENTS_DATA_FOLDER = "data/1_events"
EVENTS_DATA_FILE = "all_events.csv"

In [15]:
df_events = pd.read_csv(f"{EVENTS_DATA_FOLDER}/{EVENTS_DATA_FILE}")

In [16]:
df_events_v2 = df_events.drop(["id","region_id"],axis=1)

In [17]:
df_events_v2.head(5)

Unnamed: 0,region_title,region_city,all_region,start,end,clean_end,intersection_alarm_id,type,event_time
0,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,alarm,
1,Львівщина,Львів,0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 07:15:28,,alarm,
2,Одещина,Одеса,0,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:47:03,,alarm,
3,Житомирщина,Житомир,0,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 09:36:36,,alarm,
4,Вінниччина,Вінниця,0,2022-02-26 08:39:39,2022-02-26 10:42:41,2022-02-26 10:42:41,,alarm,


In [18]:
# df_events_v2["start_time"] = df_events_v2.apply(lambda x: x["start"] if not isNaN(x["start"]) else x["event_time"] , axis=1)
# df_events_v2["end_time"] = df_events_v2.apply(lambda x: x["end"] if not isNaN(x["end"]) else x["event_time"], axis=1)

In [19]:
df_events_v2["start_time"] = pd.to_datetime(df_events_v2["start"])
df_events_v2["end_time"] = pd.to_datetime(df_events_v2["end"])
df_events_v2["event_time"] = pd.to_datetime(df_events_v2["event_time"])

In [20]:
df_events_v2["start_hour"] = df_events_v2['start_time'].dt.floor('H')
df_events_v2["end_hour"] = df_events_v2['end_time'].dt.ceil('H')
df_events_v2["event_hour"] = df_events_v2['event_time'].dt.round('H')

In [21]:
df_events_v2["start_hour"] = df_events_v2.apply(lambda x: x["start_hour"] if not isNaN(x["start_hour"]) else x["event_hour"] , axis=1)
df_events_v2["end_hour"] = df_events_v2.apply(lambda x: x["end_hour"] if not isNaN(x["end_hour"]) else x["event_hour"] , axis=1)

In [22]:
df_events_v2["day_date"] = df_events_v2["start_time"].dt.date

df_events_v2["start_hour_datetimeEpoch"] = df_events_v2['start_hour'].apply(lambda x: int(x.strftime('%s'))  if not isNaN(x) else None)
df_events_v2["end_hour_datetimeEpoch"] = df_events_v2['end_hour'].apply(lambda x: int(x.strftime('%s'))  if not isNaN(x) else None)

df_events_v2.head(10)

Unnamed: 0,region_title,region_city,all_region,start,end,clean_end,intersection_alarm_id,type,event_time,start_time,end_time,start_hour,end_hour,event_hour,day_date,start_hour_datetimeEpoch,end_hour_datetimeEpoch
0,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,alarm,NaT,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 22:00:00,2022-02-26 00:00:00,NaT,2022-02-25,1645819200,1645826400
1,Львівщина,Львів,0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 07:15:28,,alarm,NaT,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 06:00:00,2022-02-26 08:00:00,NaT,2022-02-26,1645848000,1645855200
2,Одещина,Одеса,0,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:47:03,,alarm,NaT,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:00:00,2022-02-26 08:00:00,NaT,2022-02-26,1645851600,1645855200
3,Житомирщина,Житомир,0,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 09:36:36,,alarm,NaT,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 08:00:00,2022-02-26 10:00:00,NaT,2022-02-26,1645855200,1645862400
4,Вінниччина,Вінниця,0,2022-02-26 08:39:39,2022-02-26 10:42:41,2022-02-26 10:42:41,,alarm,NaT,2022-02-26 08:39:39,2022-02-26 10:42:41,2022-02-26 08:00:00,2022-02-26 11:00:00,NaT,2022-02-26,1645855200,1645866000
5,Вінниччина,Вінниця,0,2022-02-26 10:58:23,2022-02-26 11:59:40,2022-02-26 11:59:40,,alarm,NaT,2022-02-26 10:58:23,2022-02-26 11:59:40,2022-02-26 10:00:00,2022-02-26 12:00:00,NaT,2022-02-26,1645862400,1645869600
6,Львівщина,Львів,0,2022-02-26 13:44:44,2022-02-26 14:27:25,2022-02-26 14:27:25,,alarm,NaT,2022-02-26 13:44:44,2022-02-26 14:27:25,2022-02-26 13:00:00,2022-02-26 15:00:00,NaT,2022-02-26,1645873200,1645880400
7,Рівненщина,Рівненська обл.,1,2022-02-26 15:54:53,2022-02-26 16:14:46,2022-02-26 16:14:46,,alarm,NaT,2022-02-26 15:54:53,2022-02-26 16:14:46,2022-02-26 15:00:00,2022-02-26 17:00:00,NaT,2022-02-26,1645880400,1645887600
8,Волинь,Волинська обл.,1,2022-02-26 16:08:26,2022-02-26 16:39:26,2022-02-26 16:39:26,,alarm,NaT,2022-02-26 16:08:26,2022-02-26 16:39:26,2022-02-26 16:00:00,2022-02-26 17:00:00,NaT,2022-02-26,1645884000,1645887600
9,Хмельниччина,Деражня,0,2022-02-26 16:10:29,2022-02-26 17:19:57,2022-02-26 17:19:57,,alarm,NaT,2022-02-26 16:10:29,2022-02-26 17:19:57,2022-02-26 16:00:00,2022-02-26 18:00:00,NaT,2022-02-26,1645884000,1645891200


In [23]:
df_events_v2[~(df_events_v2["type"]=="alarm")].shape

(5944, 17)

In [24]:
df_events_v2[~(df_events_v2["type"]=="alarm")].head(5)

Unnamed: 0,region_title,region_city,all_region,start,end,clean_end,intersection_alarm_id,type,event_time,start_time,end_time,start_hour,end_hour,event_hour,day_date,start_hour_datetimeEpoch,end_hour_datetimeEpoch
19933,Сумщина,Лебедин,0,,,,,art_attack,2022-02-26 07:51:10,NaT,NaT,2022-02-26 08:00:00,2022-02-26 08:00:00,2022-02-26 08:00:00,NaT,1645855200,1645855200
19934,Чернігівщина,Седнів,0,,,,,art_attack,2022-02-26 11:05:18,NaT,NaT,2022-02-26 11:00:00,2022-02-26 11:00:00,2022-02-26 11:00:00,NaT,1645866000,1645866000
19935,Сумщина,Охтирка,0,,,,,art_attack,2022-02-26 12:58:10,NaT,NaT,2022-02-26 13:00:00,2022-02-26 13:00:00,2022-02-26 13:00:00,NaT,1645873200,1645873200
19936,Херсонщина,Херсон,0,,,,,art_attack,2022-02-26 13:56:10,NaT,NaT,2022-02-26 14:00:00,2022-02-26 14:00:00,2022-02-26 14:00:00,NaT,1645876800,1645876800
19937,Херсонщина,Херсон,0,,,,,art_attack,2022-02-26 14:47:10,NaT,NaT,2022-02-26 15:00:00,2022-02-26 15:00:00,2022-02-26 15:00:00,NaT,1645880400,1645880400


In [25]:
df_events_v2[df_events_v2["type"]=="alarm"].shape

(19933, 17)

## prepare weather

In [26]:
WEATHER_DATA_FOLDER = "data/1_weather"
WEATHER_DATA_FILE = "all_weather_by_hour.csv"

In [27]:
df_weather = pd.read_csv(f"{WEATHER_DATA_FOLDER}/{WEATHER_DATA_FILE}")
df_weather["day_datetime"] = pd.to_datetime(df_weather["day_datetime"])

In [28]:
df_weather.shape

(190656, 67)

In [29]:
df_weather.head(15)

Unnamed: 0,city_latitude,city_longitude,city_resolvedAddress,city_address,city_timezone,city_tzoffset,day_datetime,day_datetimeEpoch,day_tempmax,day_tempmin,day_temp,day_feelslikemax,day_feelslikemin,day_feelslike,day_dew,day_humidity,day_precip,day_precipprob,day_precipcover,day_snow,day_snowdepth,day_windgust,day_windspeed,day_winddir,day_pressure,day_cloudcover,day_visibility,day_solarradiation,day_solarenergy,day_uvindex,day_severerisk,day_sunrise,day_sunriseEpoch,day_sunset,day_sunsetEpoch,day_moonphase,day_conditions,day_description,day_icon,day_source,day_preciptype,day_stations,hour_datetime,hour_datetimeEpoch,hour_temp,hour_feelslike,hour_humidity,hour_dew,hour_precip,hour_precipprob,hour_snow,hour_snowdepth,hour_preciptype,hour_windgust,hour_windspeed,hour_winddir,hour_pressure,hour_visibility,hour_cloudcover,hour_solarradiation,hour_solarenergy,hour_uvindex,hour_severerisk,hour_conditions,hour_icon,hour_source,hour_stations
0,49.4407,32.0637,"Черкаси, Україна","Cherkasy,Ukraine",Europe/Kiev,2.0,2022-02-24,1645653600,4.9,-0.1,1.9,4.4,-0.1,1.6,-0.6,83.4,0.0,0.0,0.0,0.0,0.0,14.4,8.3,47.6,1023.2,69.4,12.2,47.4,4.4,2.0,10.0,06:44:41,1645677881,17:26:05,1645716365,0.77,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,obs,rain,remote,00:00:00,1645653600,0.9,0.9,75.73,-2.9,0.0,0.0,0.0,0.0,,7.2,4.0,132.0,1020.0,24.1,89.0,0.0,,0.0,10.0,Partially cloudy,partly-cloudy-night,obs,remote
1,49.4407,32.0637,"Черкаси, Україна","Cherkasy,Ukraine",Europe/Kiev,2.0,2022-02-24,1645653600,4.9,-0.1,1.9,4.4,-0.1,1.6,-0.6,83.4,0.0,0.0,0.0,0.0,0.0,14.4,8.3,47.6,1023.2,69.4,12.2,47.4,4.4,2.0,10.0,06:44:41,1645677881,17:26:05,1645716365,0.77,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,obs,rain,remote,01:00:00,1645657200,0.6,0.6,77.96,-2.8,0.0,0.0,0.0,0.0,,6.5,3.6,147.9,1020.0,24.1,71.1,0.0,,0.0,10.0,Partially cloudy,partly-cloudy-night,obs,remote
2,49.4407,32.0637,"Черкаси, Україна","Cherkasy,Ukraine",Europe/Kiev,2.0,2022-02-24,1645653600,4.9,-0.1,1.9,4.4,-0.1,1.6,-0.6,83.4,0.0,0.0,0.0,0.0,0.0,14.4,8.3,47.6,1023.2,69.4,12.2,47.4,4.4,2.0,10.0,06:44:41,1645677881,17:26:05,1645716365,0.77,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,obs,rain,remote,02:00:00,1645660800,0.4,0.4,82.08,-2.3,0.0,0.0,0.0,0.0,,8.3,3.2,198.2,1020.0,24.1,95.4,0.0,,0.0,10.0,Overcast,cloudy,obs,remote
3,49.4407,32.0637,"Черкаси, Україна","Cherkasy,Ukraine",Europe/Kiev,2.0,2022-02-24,1645653600,4.9,-0.1,1.9,4.4,-0.1,1.6,-0.6,83.4,0.0,0.0,0.0,0.0,0.0,14.4,8.3,47.6,1023.2,69.4,12.2,47.4,4.4,2.0,10.0,06:44:41,1645677881,17:26:05,1645716365,0.77,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,obs,rain,remote,03:00:00,1645664400,0.2,0.2,83.28,-2.3,0.0,0.0,0.0,0.0,,5.0,2.9,202.3,1020.0,24.1,74.6,0.0,,0.0,10.0,Partially cloudy,partly-cloudy-night,obs,remote
4,49.4407,32.0637,"Черкаси, Україна","Cherkasy,Ukraine",Europe/Kiev,2.0,2022-02-24,1645653600,4.9,-0.1,1.9,4.4,-0.1,1.6,-0.6,83.4,0.0,0.0,0.0,0.0,0.0,14.4,8.3,47.6,1023.2,69.4,12.2,47.4,4.4,2.0,10.0,06:44:41,1645677881,17:26:05,1645716365,0.77,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,obs,rain,remote,04:00:00,1645668000,0.0,0.0,83.87,-2.4,0.0,0.0,0.0,0.0,,5.0,2.9,203.6,1021.0,24.1,67.8,0.0,,0.0,10.0,Partially cloudy,partly-cloudy-night,obs,remote
5,49.4407,32.0637,"Черкаси, Україна","Cherkasy,Ukraine",Europe/Kiev,2.0,2022-02-24,1645653600,4.9,-0.1,1.9,4.4,-0.1,1.6,-0.6,83.4,0.0,0.0,0.0,0.0,0.0,14.4,8.3,47.6,1023.2,69.4,12.2,47.4,4.4,2.0,10.0,06:44:41,1645677881,17:26:05,1645716365,0.77,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,obs,rain,remote,05:00:00,1645671600,-0.1,-0.1,83.86,-2.5,0.0,0.0,0.0,0.0,,4.3,1.8,197.5,1021.0,24.1,94.1,0.0,,0.0,10.0,Overcast,cloudy,obs,remote
6,49.4407,32.0637,"Черкаси, Україна","Cherkasy,Ukraine",Europe/Kiev,2.0,2022-02-24,1645653600,4.9,-0.1,1.9,4.4,-0.1,1.6,-0.6,83.4,0.0,0.0,0.0,0.0,0.0,14.4,8.3,47.6,1023.2,69.4,12.2,47.4,4.4,2.0,10.0,06:44:41,1645677881,17:26:05,1645716365,0.77,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,obs,rain,remote,06:00:00,1645675200,0.0,0.0,83.87,-2.4,0.0,0.0,0.0,0.0,,2.9,1.1,199.2,1022.0,24.1,100.0,0.0,0.0,0.0,10.0,Overcast,cloudy,obs,remote
7,49.4407,32.0637,"Черкаси, Україна","Cherkasy,Ukraine",Europe/Kiev,2.0,2022-02-24,1645653600,4.9,-0.1,1.9,4.4,-0.1,1.6,-0.6,83.4,0.0,0.0,0.0,0.0,0.0,14.4,8.3,47.6,1023.2,69.4,12.2,47.4,4.4,2.0,10.0,06:44:41,1645677881,17:26:05,1645716365,0.77,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,obs,rain,remote,07:00:00,1645678800,0.0,0.0,83.87,-2.4,0.0,0.0,0.0,0.0,,2.2,1.4,221.7,1023.0,0.2,97.9,0.0,0.3,0.0,10.0,Overcast,fog,obs,remote
8,49.4407,32.0637,"Черкаси, Україна","Cherkasy,Ukraine",Europe/Kiev,2.0,2022-02-24,1645653600,4.9,-0.1,1.9,4.4,-0.1,1.6,-0.6,83.4,0.0,0.0,0.0,0.0,0.0,14.4,8.3,47.6,1023.2,69.4,12.2,47.4,4.4,2.0,10.0,06:44:41,1645677881,17:26:05,1645716365,0.77,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,obs,rain,remote,08:00:00,1645682400,0.1,0.1,87.68,-1.7,0.0,0.0,0.0,0.0,,5.4,3.2,245.3,1023.0,0.1,90.8,179.4,0.6,2.0,10.0,Overcast,fog,obs,remote
9,49.4407,32.0637,"Черкаси, Україна","Cherkasy,Ukraine",Europe/Kiev,2.0,2022-02-24,1645653600,4.9,-0.1,1.9,4.4,-0.1,1.6,-0.6,83.4,0.0,0.0,0.0,0.0,0.0,14.4,8.3,47.6,1023.2,69.4,12.2,47.4,4.4,2.0,10.0,06:44:41,1645677881,17:26:05,1645716365,0.77,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,obs,rain,remote,09:00:00,1645686000,1.4,1.4,85.91,-0.7,0.0,0.0,0.0,0.0,,5.0,2.5,247.0,1024.0,0.1,73.3,42.0,0.2,0.0,10.0,Partially cloudy,fog,obs,remote


In [30]:
# len(clmns)

In [31]:
# exclude
weather_exclude = [
"day_feelslikemax",
"day_feelslikemin",
"day_sunriseEpoch",
"day_sunsetEpoch",
"day_description",
"city_latitude",
"city_longitude",
"city_address",
"city_timezone",
"city_tzoffset",
"day_feelslike",
"day_precipprob",
"day_snow",
"day_snowdepth",
"day_windgust",
"day_windspeed",
"day_winddir",
"day_pressure",
"day_cloudcover",
"day_visibility",
"day_severerisk",
"day_conditions",
"day_icon",
"day_source",
"day_preciptype",
"day_stations",
"hour_icon",
"hour_source",
"hour_stations",
"hour_feelslike"
]

In [32]:
# new_list = [x for x in clmns if (x not in weather_exclude)]
# new_list

In [33]:
df_weather_v2 = df_weather.drop(weather_exclude, axis=1)

In [34]:
df_weather_v2["city"] = df_weather_v2["city_resolvedAddress"].apply(lambda x: x.split(",")[0])
df_weather_v2["city"] = df_weather_v2["city"].replace('Хмельницька область', "Хмельницький")

In [35]:
df_weather_v2.head(5)

Unnamed: 0,city_resolvedAddress,day_datetime,day_datetimeEpoch,day_tempmax,day_tempmin,day_temp,day_dew,day_humidity,day_precip,day_precipcover,day_solarradiation,day_solarenergy,day_uvindex,day_sunrise,day_sunset,day_moonphase,hour_datetime,hour_datetimeEpoch,hour_temp,hour_humidity,hour_dew,hour_precip,hour_precipprob,hour_snow,hour_snowdepth,hour_preciptype,hour_windgust,hour_windspeed,hour_winddir,hour_pressure,hour_visibility,hour_cloudcover,hour_solarradiation,hour_solarenergy,hour_uvindex,hour_severerisk,hour_conditions,city
0,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,00:00:00,1645653600,0.9,75.73,-2.9,0.0,0.0,0.0,0.0,,7.2,4.0,132.0,1020.0,24.1,89.0,0.0,,0.0,10.0,Partially cloudy,Черкаси
1,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,01:00:00,1645657200,0.6,77.96,-2.8,0.0,0.0,0.0,0.0,,6.5,3.6,147.9,1020.0,24.1,71.1,0.0,,0.0,10.0,Partially cloudy,Черкаси
2,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,02:00:00,1645660800,0.4,82.08,-2.3,0.0,0.0,0.0,0.0,,8.3,3.2,198.2,1020.0,24.1,95.4,0.0,,0.0,10.0,Overcast,Черкаси
3,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,03:00:00,1645664400,0.2,83.28,-2.3,0.0,0.0,0.0,0.0,,5.0,2.9,202.3,1020.0,24.1,74.6,0.0,,0.0,10.0,Partially cloudy,Черкаси
4,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,04:00:00,1645668000,0.0,83.87,-2.4,0.0,0.0,0.0,0.0,,5.0,2.9,203.6,1021.0,24.1,67.8,0.0,,0.0,10.0,Partially cloudy,Черкаси


In [36]:
df_weather_v2.shape

(190656, 38)

## merging data

In [37]:
df_regions = pd.read_csv(f"data/0_meta/regions.csv")

In [38]:
df_regions.head(5)

Unnamed: 0,region,center_city_ua,center_city_en,region_alt,region_id
0,АР Крим,Сімферополь,Simferopol,Крим,1
1,Вінницька,Вінниця,Vinnytsia,Вінниччина,2
2,Волинська,Луцьк,Lutsk,Волинь,3
3,Дніпропетровська,Дніпро,Dnipro,Дніпропетровщина,4
4,Донецька,Донецьк,Donetsk,Донеччина,5


In [39]:
df_weather_reg = pd.merge(df_weather_v2, df_regions, left_on="city",right_on="center_city_ua")

In [40]:
df_weather_reg.head(10)

Unnamed: 0,city_resolvedAddress,day_datetime,day_datetimeEpoch,day_tempmax,day_tempmin,day_temp,day_dew,day_humidity,day_precip,day_precipcover,day_solarradiation,day_solarenergy,day_uvindex,day_sunrise,day_sunset,day_moonphase,hour_datetime,hour_datetimeEpoch,hour_temp,hour_humidity,hour_dew,hour_precip,hour_precipprob,hour_snow,hour_snowdepth,hour_preciptype,hour_windgust,hour_windspeed,hour_winddir,hour_pressure,hour_visibility,hour_cloudcover,hour_solarradiation,hour_solarenergy,hour_uvindex,hour_severerisk,hour_conditions,city,region,center_city_ua,center_city_en,region_alt,region_id
0,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,00:00:00,1645653600,0.9,75.73,-2.9,0.0,0.0,0.0,0.0,,7.2,4.0,132.0,1020.0,24.1,89.0,0.0,,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
1,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,01:00:00,1645657200,0.6,77.96,-2.8,0.0,0.0,0.0,0.0,,6.5,3.6,147.9,1020.0,24.1,71.1,0.0,,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
2,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,02:00:00,1645660800,0.4,82.08,-2.3,0.0,0.0,0.0,0.0,,8.3,3.2,198.2,1020.0,24.1,95.4,0.0,,0.0,10.0,Overcast,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
3,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,03:00:00,1645664400,0.2,83.28,-2.3,0.0,0.0,0.0,0.0,,5.0,2.9,202.3,1020.0,24.1,74.6,0.0,,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
4,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,04:00:00,1645668000,0.0,83.87,-2.4,0.0,0.0,0.0,0.0,,5.0,2.9,203.6,1021.0,24.1,67.8,0.0,,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
5,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,05:00:00,1645671600,-0.1,83.86,-2.5,0.0,0.0,0.0,0.0,,4.3,1.8,197.5,1021.0,24.1,94.1,0.0,,0.0,10.0,Overcast,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
6,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,06:00:00,1645675200,0.0,83.87,-2.4,0.0,0.0,0.0,0.0,,2.9,1.1,199.2,1022.0,24.1,100.0,0.0,0.0,0.0,10.0,Overcast,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
7,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,07:00:00,1645678800,0.0,83.87,-2.4,0.0,0.0,0.0,0.0,,2.2,1.4,221.7,1023.0,0.2,97.9,0.0,0.3,0.0,10.0,Overcast,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
8,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,08:00:00,1645682400,0.1,87.68,-1.7,0.0,0.0,0.0,0.0,,5.4,3.2,245.3,1023.0,0.1,90.8,179.4,0.6,2.0,10.0,Overcast,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
9,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,09:00:00,1645686000,1.4,85.91,-0.7,0.0,0.0,0.0,0.0,,5.0,2.5,247.0,1024.0,0.1,73.3,42.0,0.2,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23


In [41]:
df_weather_reg.shape

(190656, 43)

In [42]:
df_weather_v2.shape

(190656, 38)

### Merging weather and events

In [43]:
# df_events_v2["start_hour_datetimeEpoch"] = df_events_v2['start_hour'].apply(lambda x: int(x.strftime('%s'))  if not isNaN(x) else 0)
# df_events_v2["end_hour_datetimeEpoch"] = df_events_v2['end_hour'].apply(lambda x: int(x.strftime('%s'))  if not isNaN(x) else 0)

In [44]:
df_events_v2.dtypes

region_title                        object
region_city                         object
all_region                           int64
start                               object
end                                 object
clean_end                           object
intersection_alarm_id              float64
type                                object
event_time                  datetime64[ns]
start_time                  datetime64[ns]
end_time                    datetime64[ns]
start_hour                  datetime64[ns]
end_hour                    datetime64[ns]
event_hour                  datetime64[ns]
day_date                            object
start_hour_datetimeEpoch             int64
end_hour_datetimeEpoch               int64
dtype: object

In [45]:
df_events_v2.shape

(25877, 17)

In [46]:
df_events_v2.head(10)

Unnamed: 0,region_title,region_city,all_region,start,end,clean_end,intersection_alarm_id,type,event_time,start_time,end_time,start_hour,end_hour,event_hour,day_date,start_hour_datetimeEpoch,end_hour_datetimeEpoch
0,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,alarm,NaT,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 22:00:00,2022-02-26 00:00:00,NaT,2022-02-25,1645819200,1645826400
1,Львівщина,Львів,0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 07:15:28,,alarm,NaT,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 06:00:00,2022-02-26 08:00:00,NaT,2022-02-26,1645848000,1645855200
2,Одещина,Одеса,0,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:47:03,,alarm,NaT,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:00:00,2022-02-26 08:00:00,NaT,2022-02-26,1645851600,1645855200
3,Житомирщина,Житомир,0,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 09:36:36,,alarm,NaT,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 08:00:00,2022-02-26 10:00:00,NaT,2022-02-26,1645855200,1645862400
4,Вінниччина,Вінниця,0,2022-02-26 08:39:39,2022-02-26 10:42:41,2022-02-26 10:42:41,,alarm,NaT,2022-02-26 08:39:39,2022-02-26 10:42:41,2022-02-26 08:00:00,2022-02-26 11:00:00,NaT,2022-02-26,1645855200,1645866000
5,Вінниччина,Вінниця,0,2022-02-26 10:58:23,2022-02-26 11:59:40,2022-02-26 11:59:40,,alarm,NaT,2022-02-26 10:58:23,2022-02-26 11:59:40,2022-02-26 10:00:00,2022-02-26 12:00:00,NaT,2022-02-26,1645862400,1645869600
6,Львівщина,Львів,0,2022-02-26 13:44:44,2022-02-26 14:27:25,2022-02-26 14:27:25,,alarm,NaT,2022-02-26 13:44:44,2022-02-26 14:27:25,2022-02-26 13:00:00,2022-02-26 15:00:00,NaT,2022-02-26,1645873200,1645880400
7,Рівненщина,Рівненська обл.,1,2022-02-26 15:54:53,2022-02-26 16:14:46,2022-02-26 16:14:46,,alarm,NaT,2022-02-26 15:54:53,2022-02-26 16:14:46,2022-02-26 15:00:00,2022-02-26 17:00:00,NaT,2022-02-26,1645880400,1645887600
8,Волинь,Волинська обл.,1,2022-02-26 16:08:26,2022-02-26 16:39:26,2022-02-26 16:39:26,,alarm,NaT,2022-02-26 16:08:26,2022-02-26 16:39:26,2022-02-26 16:00:00,2022-02-26 17:00:00,NaT,2022-02-26,1645884000,1645887600
9,Хмельниччина,Деражня,0,2022-02-26 16:10:29,2022-02-26 17:19:57,2022-02-26 17:19:57,,alarm,NaT,2022-02-26 16:10:29,2022-02-26 17:19:57,2022-02-26 16:00:00,2022-02-26 18:00:00,NaT,2022-02-26,1645884000,1645891200


In [47]:
# df_events_v2_sample = df_events_v2.sample(10)
# df_events_v2_sample.shape

events_dict = df_events_v2.to_dict('records')
events_by_hour = []

In [48]:
events_dict[0]

{'region_title': 'Вінниччина',
 'region_city': 'Вінниця',
 'all_region': 0,
 'start': '2022-02-25 22:55:42',
 'end': '2022-02-25 23:41:53',
 'clean_end': '2022-02-25 23:41:53',
 'intersection_alarm_id': nan,
 'type': 'alarm',
 'event_time': NaT,
 'start_time': Timestamp('2022-02-25 22:55:42'),
 'end_time': Timestamp('2022-02-25 23:41:53'),
 'start_hour': Timestamp('2022-02-25 22:00:00'),
 'end_hour': Timestamp('2022-02-26 00:00:00'),
 'event_hour': NaT,
 'day_date': datetime.date(2022, 2, 25),
 'start_hour_datetimeEpoch': 1645819200,
 'end_hour_datetimeEpoch': 1645826400}

In [49]:
for event in events_dict:
    for d in pd.date_range(start=event["start_hour"], end=event["end_hour"], freq='1H'):
        et = event.copy()
        et["hour_level_event_time"] = d
        events_by_hour.append(et)

In [50]:
df_events_v3 = pd.DataFrame.from_dict(events_by_hour)

In [51]:
df_events_v3["hour_level_event_datetimeEpoch"] = df_events_v3["hour_level_event_time"].apply(lambda x: int(x.strftime('%s'))  if not isNaN(x) else None)

In [52]:
df_events_v3.shape

(64804, 19)

In [53]:
df_events_v3.head(15)

Unnamed: 0,region_title,region_city,all_region,start,end,clean_end,intersection_alarm_id,type,event_time,start_time,end_time,start_hour,end_hour,event_hour,day_date,start_hour_datetimeEpoch,end_hour_datetimeEpoch,hour_level_event_time,hour_level_event_datetimeEpoch
0,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,alarm,NaT,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 22:00:00,2022-02-26 00:00:00,NaT,2022-02-25,1645819200,1645826400,2022-02-25 22:00:00,1645819200
1,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,alarm,NaT,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 22:00:00,2022-02-26 00:00:00,NaT,2022-02-25,1645819200,1645826400,2022-02-25 23:00:00,1645822800
2,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,alarm,NaT,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 22:00:00,2022-02-26 00:00:00,NaT,2022-02-25,1645819200,1645826400,2022-02-26 00:00:00,1645826400
3,Львівщина,Львів,0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 07:15:28,,alarm,NaT,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 06:00:00,2022-02-26 08:00:00,NaT,2022-02-26,1645848000,1645855200,2022-02-26 06:00:00,1645848000
4,Львівщина,Львів,0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 07:15:28,,alarm,NaT,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 06:00:00,2022-02-26 08:00:00,NaT,2022-02-26,1645848000,1645855200,2022-02-26 07:00:00,1645851600
5,Львівщина,Львів,0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 07:15:28,,alarm,NaT,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 06:00:00,2022-02-26 08:00:00,NaT,2022-02-26,1645848000,1645855200,2022-02-26 08:00:00,1645855200
6,Одещина,Одеса,0,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:47:03,,alarm,NaT,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:00:00,2022-02-26 08:00:00,NaT,2022-02-26,1645851600,1645855200,2022-02-26 07:00:00,1645851600
7,Одещина,Одеса,0,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:47:03,,alarm,NaT,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:00:00,2022-02-26 08:00:00,NaT,2022-02-26,1645851600,1645855200,2022-02-26 08:00:00,1645855200
8,Житомирщина,Житомир,0,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 09:36:36,,alarm,NaT,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 08:00:00,2022-02-26 10:00:00,NaT,2022-02-26,1645855200,1645862400,2022-02-26 08:00:00,1645855200
9,Житомирщина,Житомир,0,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 09:36:36,,alarm,NaT,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 08:00:00,2022-02-26 10:00:00,NaT,2022-02-26,1645855200,1645862400,2022-02-26 09:00:00,1645858800


In [54]:
df_weather_reg.head(5)

Unnamed: 0,city_resolvedAddress,day_datetime,day_datetimeEpoch,day_tempmax,day_tempmin,day_temp,day_dew,day_humidity,day_precip,day_precipcover,day_solarradiation,day_solarenergy,day_uvindex,day_sunrise,day_sunset,day_moonphase,hour_datetime,hour_datetimeEpoch,hour_temp,hour_humidity,hour_dew,hour_precip,hour_precipprob,hour_snow,hour_snowdepth,hour_preciptype,hour_windgust,hour_windspeed,hour_winddir,hour_pressure,hour_visibility,hour_cloudcover,hour_solarradiation,hour_solarenergy,hour_uvindex,hour_severerisk,hour_conditions,city,region,center_city_ua,center_city_en,region_alt,region_id
0,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,00:00:00,1645653600,0.9,75.73,-2.9,0.0,0.0,0.0,0.0,,7.2,4.0,132.0,1020.0,24.1,89.0,0.0,,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
1,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,01:00:00,1645657200,0.6,77.96,-2.8,0.0,0.0,0.0,0.0,,6.5,3.6,147.9,1020.0,24.1,71.1,0.0,,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
2,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,02:00:00,1645660800,0.4,82.08,-2.3,0.0,0.0,0.0,0.0,,8.3,3.2,198.2,1020.0,24.1,95.4,0.0,,0.0,10.0,Overcast,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
3,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,03:00:00,1645664400,0.2,83.28,-2.3,0.0,0.0,0.0,0.0,,5.0,2.9,202.3,1020.0,24.1,74.6,0.0,,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
4,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,04:00:00,1645668000,0.0,83.87,-2.4,0.0,0.0,0.0,0.0,,5.0,2.9,203.6,1021.0,24.1,67.8,0.0,,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23


In [55]:
df_weather_reg.shape

(190656, 43)

In [56]:
df_events_v3.head(10)

Unnamed: 0,region_title,region_city,all_region,start,end,clean_end,intersection_alarm_id,type,event_time,start_time,end_time,start_hour,end_hour,event_hour,day_date,start_hour_datetimeEpoch,end_hour_datetimeEpoch,hour_level_event_time,hour_level_event_datetimeEpoch
0,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,alarm,NaT,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 22:00:00,2022-02-26 00:00:00,NaT,2022-02-25,1645819200,1645826400,2022-02-25 22:00:00,1645819200
1,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,alarm,NaT,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 22:00:00,2022-02-26 00:00:00,NaT,2022-02-25,1645819200,1645826400,2022-02-25 23:00:00,1645822800
2,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,alarm,NaT,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 22:00:00,2022-02-26 00:00:00,NaT,2022-02-25,1645819200,1645826400,2022-02-26 00:00:00,1645826400
3,Львівщина,Львів,0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 07:15:28,,alarm,NaT,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 06:00:00,2022-02-26 08:00:00,NaT,2022-02-26,1645848000,1645855200,2022-02-26 06:00:00,1645848000
4,Львівщина,Львів,0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 07:15:28,,alarm,NaT,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 06:00:00,2022-02-26 08:00:00,NaT,2022-02-26,1645848000,1645855200,2022-02-26 07:00:00,1645851600
5,Львівщина,Львів,0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 07:15:28,,alarm,NaT,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 06:00:00,2022-02-26 08:00:00,NaT,2022-02-26,1645848000,1645855200,2022-02-26 08:00:00,1645855200
6,Одещина,Одеса,0,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:47:03,,alarm,NaT,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:00:00,2022-02-26 08:00:00,NaT,2022-02-26,1645851600,1645855200,2022-02-26 07:00:00,1645851600
7,Одещина,Одеса,0,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:47:03,,alarm,NaT,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:00:00,2022-02-26 08:00:00,NaT,2022-02-26,1645851600,1645855200,2022-02-26 08:00:00,1645855200
8,Житомирщина,Житомир,0,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 09:36:36,,alarm,NaT,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 08:00:00,2022-02-26 10:00:00,NaT,2022-02-26,1645855200,1645862400,2022-02-26 08:00:00,1645855200
9,Житомирщина,Житомир,0,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 09:36:36,,alarm,NaT,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 08:00:00,2022-02-26 10:00:00,NaT,2022-02-26,1645855200,1645862400,2022-02-26 09:00:00,1645858800


In [57]:
df_events_v4 = df_events_v3.copy().add_prefix('event_')

In [58]:
df_weather_v4 = df_weather_reg.merge(df_events_v4, 
                                     how="left", 
                                     left_on=["region_alt","hour_datetimeEpoch"],
                                     right_on=["event_region_title","event_hour_level_event_datetimeEpoch"])

In [59]:
df_weather_v4.head(10)

Unnamed: 0,city_resolvedAddress,day_datetime,day_datetimeEpoch,day_tempmax,day_tempmin,day_temp,day_dew,day_humidity,day_precip,day_precipcover,day_solarradiation,day_solarenergy,day_uvindex,day_sunrise,day_sunset,day_moonphase,hour_datetime,hour_datetimeEpoch,hour_temp,hour_humidity,hour_dew,hour_precip,hour_precipprob,hour_snow,hour_snowdepth,hour_preciptype,hour_windgust,hour_windspeed,hour_winddir,hour_pressure,hour_visibility,hour_cloudcover,hour_solarradiation,hour_solarenergy,hour_uvindex,hour_severerisk,hour_conditions,city,region,center_city_ua,center_city_en,region_alt,region_id,event_region_title,event_region_city,event_all_region,event_start,event_end,event_clean_end,event_intersection_alarm_id,event_type,event_event_time,event_start_time,event_end_time,event_start_hour,event_end_hour,event_event_hour,event_day_date,event_start_hour_datetimeEpoch,event_end_hour_datetimeEpoch,event_hour_level_event_time,event_hour_level_event_datetimeEpoch
0,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,00:00:00,1645653600,0.9,75.73,-2.9,0.0,0.0,0.0,0.0,,7.2,4.0,132.0,1020.0,24.1,89.0,0.0,,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23,,,,,,,,,NaT,NaT,NaT,NaT,NaT,NaT,,,,NaT,
1,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,01:00:00,1645657200,0.6,77.96,-2.8,0.0,0.0,0.0,0.0,,6.5,3.6,147.9,1020.0,24.1,71.1,0.0,,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23,,,,,,,,,NaT,NaT,NaT,NaT,NaT,NaT,,,,NaT,
2,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,02:00:00,1645660800,0.4,82.08,-2.3,0.0,0.0,0.0,0.0,,8.3,3.2,198.2,1020.0,24.1,95.4,0.0,,0.0,10.0,Overcast,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23,,,,,,,,,NaT,NaT,NaT,NaT,NaT,NaT,,,,NaT,
3,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,03:00:00,1645664400,0.2,83.28,-2.3,0.0,0.0,0.0,0.0,,5.0,2.9,202.3,1020.0,24.1,74.6,0.0,,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23,,,,,,,,,NaT,NaT,NaT,NaT,NaT,NaT,,,,NaT,
4,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,04:00:00,1645668000,0.0,83.87,-2.4,0.0,0.0,0.0,0.0,,5.0,2.9,203.6,1021.0,24.1,67.8,0.0,,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23,,,,,,,,,NaT,NaT,NaT,NaT,NaT,NaT,,,,NaT,
5,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,05:00:00,1645671600,-0.1,83.86,-2.5,0.0,0.0,0.0,0.0,,4.3,1.8,197.5,1021.0,24.1,94.1,0.0,,0.0,10.0,Overcast,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23,,,,,,,,,NaT,NaT,NaT,NaT,NaT,NaT,,,,NaT,
6,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,06:00:00,1645675200,0.0,83.87,-2.4,0.0,0.0,0.0,0.0,,2.9,1.1,199.2,1022.0,24.1,100.0,0.0,0.0,0.0,10.0,Overcast,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23,,,,,,,,,NaT,NaT,NaT,NaT,NaT,NaT,,,,NaT,
7,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,07:00:00,1645678800,0.0,83.87,-2.4,0.0,0.0,0.0,0.0,,2.2,1.4,221.7,1023.0,0.2,97.9,0.0,0.3,0.0,10.0,Overcast,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23,,,,,,,,,NaT,NaT,NaT,NaT,NaT,NaT,,,,NaT,
8,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,08:00:00,1645682400,0.1,87.68,-1.7,0.0,0.0,0.0,0.0,,5.4,3.2,245.3,1023.0,0.1,90.8,179.4,0.6,2.0,10.0,Overcast,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23,,,,,,,,,NaT,NaT,NaT,NaT,NaT,NaT,,,,NaT,
9,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,09:00:00,1645686000,1.4,85.91,-0.7,0.0,0.0,0.0,0.0,,5.0,2.5,247.0,1024.0,0.1,73.3,42.0,0.2,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23,,,,,,,,,NaT,NaT,NaT,NaT,NaT,NaT,,,,NaT,


In [60]:
df_weather_v4.shape

(207368, 62)

In [61]:
df_weather_v4.to_csv(f"{OUTPUT_FOLDER}/{WEATHER_EVENTS_OUTPUT_DATA_FILE}", sep=";", index=False)