# <span style="color:#ff5f27"> 📝 Imports

In [1]:
import pandas as pd
from datetime import datetime

# <span style="color:#ff5f27"> 🏙️ Cities coordinates

In [2]:
coordinates = {
    'Kyiv': [50.5, 30.5],  # latitude, longitude
    'London': [51.5, -0.099990845],
    'Paris': [48.90001, 2.4000092],
    'Stockholm': [59.300003, 18.100006],
    'New_York': [40.699997, -74],
    'Los_Angeles': [34.1, -118.2],
    'Singapore': [1.4000015, 103.80002],
    'Sidney': [40.300003, 84.2],
    'Hong_Kong': [22.300003, 114.20001],
    'Rome': [41.90001, 12.5]
}

# <span style="color:#ff5f27"> 👩🏻‍🔬 Data Preparation

In [3]:
def data_review(data, city_name):
    print(data.time.agg({'min','max'}))
    
    data.time = data.time.str.replace('T',' ')
    data.time = pd.to_datetime(data.time, format='%Y-%m-%d %H:%M')
    
    data['city'] = city_name
    
    data.columns = [
        'date',
        'temperature',
        'humidity',
        'precipitation',
        'weathercode',
        'wind_speed',
        'wind_direction',
        'city_name'
    ]
    
    data[['temperature','wind_speed']] = data[['temperature','wind_speed']].apply(lambda x: x.str.replace(',','.'))
    data.precipitation = data.precipitation.str.replace(',','.')
    
    data[['temperature','precipitation','wind_speed']] = data[
        [
            'temperature',
            'precipitation',
            'wind_speed'
        ]
    ].astype('float')
    
    data[['humidity','wind_direction']] = data[['humidity','wind_direction']].astype('int')
    
    return data[
        [
            'city_name',
            'date',
            'weathercode',
            'temperature',
            'humidity',
            'precipitation',
            'wind_speed',
            'wind_direction'
        ]
    ]

---
# <span style="color:#ff5f27"> 🕵🏻 Data Exploration

In [4]:
data_historical = pd.read_csv('data/historical_weather_data.csv')
data_historical.date = pd.to_datetime(data_historical.date)

data_historical

Unnamed: 0,city_name,date,weather_condition,temperature,humidity,precipitation,wind_speed,wind_direction
0,Rome,2000-01-01 00:00:00,0,1.3,77,0.0,10.1,17
1,Rome,2000-01-01 01:00:00,0,1.0,78,0.0,9.7,22
2,Rome,2000-01-01 02:00:00,0,0.8,79,0.0,9.2,26
3,Rome,2000-01-01 03:00:00,0,0.6,79,0.0,8.2,29
4,Rome,2000-01-01 04:00:00,0,0.5,78,0.0,8.5,28
...,...,...,...,...,...,...,...,...
2016235,Kyiv,2022-12-31 19:00:00,1,5.3,86,0.0,16.9,220
2016236,Kyiv,2022-12-31 20:00:00,3,5.2,86,0.0,16.8,222
2016237,Kyiv,2022-12-31 21:00:00,3,5.2,87,0.0,18.1,221
2016238,Kyiv,2022-12-31 22:00:00,3,5.6,86,0.0,18.6,226


In [5]:
data_historical.dtypes

city_name                    object
date                 datetime64[ns]
weather_condition             int64
temperature                 float64
humidity                      int64
precipitation               float64
wind_speed                  float64
wind_direction                int64
dtype: object

---
# <span style="color:#ff5f27"> 🔮 Connecting to Hopsworks Feature Store

In [6]:
import hopsworks

project = hopsworks.login()

fs = project.get_feature_store() 

Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/3346




Connected. Call `.close()` to terminate connection gracefully.


## <span style="color:#ff5f27">🪄 🌦️ Creating Historical Weather Feature Group

In [7]:
weather_historical_fg = fs.get_or_create_feature_group(
    name='weather_historical_fg',
    version=1,
    description='Hourly weather data for 10 cities since January 2000',
    primary_key = ['city_name'],
    event_time = 'date',
    online_enabled = True
)

weather_historical.insert(data_historical)

Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/3346/fs/3293/fg/20839


Uploading Dataframe: 0.00% |          | Rows 0/2016240 | Elapsed Time: 00:00 | Remaining Time: ?

Launching offline feature group backfill job...
Backfill Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/3346/jobs/named/weather_historical_fg_1_offline_fg_backfill/executions


(<hsfs.core.job.Job at 0x7fecdfd534c0>, None)

---