## <span style='color:#ff5f27'> 📝 Imports

In [1]:
import requests
import json
from datetime import datetime

import pandas as pd

## <span style='color:#ff5f27'> 👮🏻‍♂️ API Key

In [2]:
API_KEY = 'YOUR_API_KEY'

## <span style='color:#ff5f27'> 🧑🏻‍🏫 Functions

In [3]:
def get_json(city_name):
    return requests.get(f'https://api.waqi.info/feed/{city_name}/?token={API_KEY}').json()['data']


def get_data(city_name):
    json = get_json(city_name)
    iaqi = json['iaqi']
    forecast = json['forecast']['daily']
    return [
        city_name,
        json['aqi'],                 # AQI 
        json['time']['s'],           # Date
        str(json['city']['geo']),    # Location
        iaqi['h']['v'],
        iaqi['p']['v'],
        iaqi['pm10']['v'],
        iaqi['t']['v'],
        forecast['o3'][0]['avg'],
        forecast['o3'][0]['max'],
        forecast['o3'][0]['min'],
        forecast['pm10'][0]['avg'],
        forecast['pm10'][0]['max'],
        forecast['pm10'][0]['min'],
        forecast['pm25'][0]['avg'],
        forecast['pm25'][0]['max'],
        forecast['pm25'][0]['min'],
        forecast['uvi'][0]['avg'],
        forecast['uvi'][0]['avg'],
        forecast['uvi'][0]['avg']
    ]


def timestamp_2_time(x):
    dt_obj = datetime.strptime(str(x), '%Y-%m-%d %H:%M:%S')
    dt_obj = dt_obj.timestamp() * 1000
    return int(dt_obj)

## <span style='color:#ff5f27'> 👩🏻‍🔬 Parsing

In [4]:
cities = ['Kyiv','Lviv','Stockholm','Sundsvall','Malmo']

data_parsed = [get_data(city) for city in cities]

## <span style='color:#ff5f27'> 🧑🏻‍🏫 Dataset Preparation

In [5]:
col_names = [
    'city',
    'aqi',
    'date',
    'location',
    'iaqi_h',
    'iaqi_p',
    'iaqi_pm10',
    'iaqi_t',
    'o3_avg',
    'o3_max',
    'o3_min',
    'pm10_avg',
    'pm10_max',
    'pm10_min',
    'pm25_avg',
    'pm25_max',
    'pm25_min',  
    'uvi_avg',
    'uvi_max',
    'uvi_min', 
]

In [6]:
new_data = pd.DataFrame(
    data_parsed,
    columns=col_names
)
new_data.date = new_data.date.apply(timestamp_2_time)

new_data.head()

Unnamed: 0,city,aqi,date,location,iaqi_h,iaqi_p,iaqi_pm10,iaqi_t,o3_avg,o3_max,o3_min,pm10_avg,pm10_max,pm10_min,pm25_avg,pm25_max,pm25_min,uvi_avg,uvi_max,uvi_min
0,Kyiv,6,1662487200000,"[50.444, 30.54]",98.78,1022.7,2,13.91,19,27,11,7,11,4,23,39,14,0,0,0
1,Lviv,10,1655830800000,"[49.8475, 24.0155]",93.0,97615.0,9,16.3,30,34,29,5,11,5,18,38,15,1,1,1
2,Stockholm,13,1662476400000,"[59.34082695028323, 18.05825368127835]",59.0,1021.0,13,17.0,17,25,6,6,12,3,14,30,9,0,0,0
3,Sundsvall,11,1662476400000,"[62.38855964418764, 17.308889830123555]",34.3,1020.5,10,15.8,19,25,13,3,4,2,7,8,6,0,0,0
4,Malmo,23,1662476400000,"[55.606388092041016, 13.00196361541748]",54.5,1019.0,4,17.3,27,32,24,5,5,3,10,11,8,1,1,1


## <span style="color:#ff5f27;"> 🔮 Connecting to Hopsworks Feature Store </span>

In [7]:
import hopsworks

project = hopsworks.login()

fs = project.get_feature_store() 

Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/167
Connected. Call `.close()` to terminate connection gracefully.


## <span style="color:#ff5f27;">🪄 👩🏻‍🔬 Retrieving or Creating Feature Group</span>

In [8]:
def get_or_create_feature_group(name='air_quality',fs=fs):
    feature_group = fs.get_or_create_feature_group(
        name=name,
        description = 'Characteristics of each day',
        version = 1,
        primary_key = ['index'],
        online_enabled = True,
        event_time = ['date']
    )    
    return feature_group

In [9]:
feature_group = get_or_create_feature_group()

feature_group.insert(new_data.reset_index())

Uploading Dataframe: 0.00% |          | Rows 0/5 | Elapsed Time: 00:00 | Remaining Time: ?

Launching offline feature group backfill job...
Backfill Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/167/jobs/named/air_quality_1_offline_fg_backfill/executions


(<hsfs.core.job.Job at 0x7f91d3682400>, None)

---