# Setup

In [26]:
import datetime
import time
import requests
import pandas as pd
import hopsworks
import json
import os
import warnings
import openmeteo_requests
warnings.filterwarnings("ignore")

In [8]:
with open("data/hopsworks-token.txt") as f:
    hopsworks_api_key = f.read().rstrip()

In [9]:
project = hopsworks.login(api_key_value=hopsworks_api_key)
fs = project.get_feature_store()
secrets = hopsworks.connection(host="c.app.hopsworks.ai", api_key_value=hopsworks_api_key).get_secrets_api()

Connection closed.
Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1160342
Connected. Call `.close()` to terminate connection gracefully.
Connected. Call `.close()` to terminate connection gracefully.


In [10]:
aqi_token = secrets.get_secret("AQI_API_KEY").value
location_str = secrets.get_secret("SENSOR_LOCATION_JSON").value
location = json.loads(location_str)

country = location["country"]
city = location["city"]
street = location["street"]
aqicn_url = location["aqicn_url"]
latitude = location["latitude"]
longitude = location["longitude"]

today = datetime.date.today()
location_str

'{"country": "germany", "city": "reutlingen", "street": "zaisentalstra\\u00dfe", "aqicn_url": "https://api.waqi.info/feed/A54451", "latitude": 48.50052694662806, "longitude": 9.194111640132975}'

In [11]:
air_quality_fg = fs.get_feature_group(
    name='air_quality',
    version=1,
)
weather_fg = fs.get_feature_group(
    name='weather',
    version=1,
)

# Current air quality

In [45]:
air_quality_raw = requests.get(aqicn_url + "?token=" + aqi_token).json()
assert air_quality_raw["status"] == "ok"
data = air_quality_raw["data"]
# Mostly taken from book.
air_quality_df = pd.DataFrame()
air_quality_df["pm25"] = [data["iaqi"].get("pm25", {}).get("v", None)]
air_quality_df["pm25"] = air_quality_df["pm25"].astype("float64")
air_quality_df["country"] = country
air_quality_df["city"] = city
air_quality_df["street"] = street
air_quality_df["date"] = today
air_quality_df["date"] = pd.to_datetime(air_quality_df['date'])
air_quality_df["url"] = aqicn_url
air_quality_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 6 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   pm25     1 non-null      float64       
 1   country  1 non-null      object        
 2   city     1 non-null      object        
 3   street   1 non-null      object        
 4   date     1 non-null      datetime64[ns]
 5   url      1 non-null      object        
dtypes: datetime64[ns](1), float64(1), object(4)
memory usage: 180.0+ bytes


# Current weather forecast

In [46]:
om = openmeteo_requests.Client()
openmeteo_url = "https://api.open-meteo.com/v1/ecmwf"
today = datetime.date.today()

In [47]:
params = {
    "latitude": latitude,
    "longitude": longitude,
    "hourly": ["temperature_2m", "precipitation", "wind_speed_10m", "wind_direction_10m"]
}
responses = om.weather_api(openmeteo_url, params=params)
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")
# Stolen from the repository.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_precipitation = hourly.Variables(1).ValuesAsNumpy()
hourly_wind_speed_10m = hourly.Variables(2).ValuesAsNumpy()
hourly_wind_direction_10m = hourly.Variables(3).ValuesAsNumpy()
hourly_data = {"date": pd.date_range(
    start=pd.to_datetime(hourly.Time(), unit="s"),
    end=pd.to_datetime(hourly.TimeEnd(), unit="s"),
    freq=pd.Timedelta(seconds=hourly.Interval()),
    inclusive="left"
), "temperature_2m_mean": hourly_temperature_2m, "precipitation_sum": hourly_precipitation,
    "wind_speed_10m_max": hourly_wind_speed_10m, "wind_direction_10m_dominant": hourly_wind_direction_10m}
hourly_dataframe = pd.DataFrame(data = hourly_data)
hourly_dataframe = hourly_dataframe.dropna()
hourly_dataframe = hourly_dataframe.set_index("date")
daily_df = hourly_dataframe.between_time("11:59", "12:01")
daily_df = daily_df.reset_index()
daily_df["date"] = pd.to_datetime(daily_df["date"]).dt.date
daily_df["date"] = pd.to_datetime(daily_df["date"])
daily_df["city"] = city
daily_df.info()

Coordinates 48.75°N 9.25°E
Elevation 378.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 6 columns):
 #   Column                       Non-Null Count  Dtype         
---  ------                       --------------  -----         
 0   date                         10 non-null     datetime64[ns]
 1   temperature_2m_mean          10 non-null     float32       
 2   precipitation_sum            10 non-null     float32       
 3   wind_speed_10m_max           10 non-null     float32       
 4   wind_direction_10m_dominant  10 non-null     float32       
 5   city                         10 non-null     object        
dtypes: datetime64[ns](1), float32(4), object(1)
memory usage: 452.0+ bytes


# Update feature store

In [48]:
air_quality_fg.insert(air_quality_df)

2024-11-06 18:03:11,587 INFO: 	1 expectation(s) included in expectation_suite.
Validation succeeded.
Validation Report saved successfully, explore a summary at https://c.app.hopsworks.ai:443/p/1160342/fs/1151045/fg/1338721


Uploading Dataframe: 0.00% |          | Rows 0/1 | Elapsed Time: 00:00 | Remaining Time: ?

Launching job: air_quality_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/1160342/jobs/named/air_quality_1_offline_fg_materialization/executions


(<hsfs.core.job.Job at 0x109959df0>,
 {
   "success": true,
   "results": [
     {
       "success": true,
       "expectation_config": {
         "expectation_type": "expect_column_min_to_be_between",
         "kwargs": {
           "column": "pm25",
           "min_value": -0.1,
           "max_value": 500.0,
           "strict_min": true
         },
         "meta": {
           "expectationId": 662535
         }
       },
       "result": {
         "observed_value": 55.0,
         "element_count": 1,
         "missing_count": null,
         "missing_percent": null
       },
       "meta": {
         "ingestionResult": "INGESTED",
         "validationTime": "2024-11-06T05:03:11.000587Z"
       },
       "exception_info": {
         "raised_exception": false,
         "exception_message": null,
         "exception_traceback": null
       }
     }
   ],
   "evaluation_parameters": {},
   "statistics": {
     "evaluated_expectations": 1,
     "successful_expectations": 1,
     "unsucc

In [49]:
weather_fg.insert(daily_df)

2024-11-06 18:03:29,575 INFO: 	2 expectation(s) included in expectation_suite.
Validation succeeded.
Validation Report saved successfully, explore a summary at https://c.app.hopsworks.ai:443/p/1160342/fs/1151045/fg/1337696


Uploading Dataframe: 0.00% |          | Rows 0/10 | Elapsed Time: 00:00 | Remaining Time: ?

Launching job: weather_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/1160342/jobs/named/weather_1_offline_fg_materialization/executions


(<hsfs.core.job.Job at 0x10a5e34d0>,
 {
   "success": true,
   "results": [
     {
       "success": true,
       "expectation_config": {
         "expectation_type": "expect_column_min_to_be_between",
         "kwargs": {
           "column": "precipitation_sum",
           "min_value": -0.1,
           "max_value": 1000.0,
           "strict_min": true
         },
         "meta": {
           "expectationId": 661512
         }
       },
       "result": {
         "observed_value": 0.0,
         "element_count": 10,
         "missing_count": null,
         "missing_percent": null
       },
       "meta": {
         "ingestionResult": "INGESTED",
         "validationTime": "2024-11-06T05:03:29.000575Z"
       },
       "exception_info": {
         "raised_exception": false,
         "exception_message": null,
         "exception_traceback": null
       }
     },
     {
       "success": true,
       "expectation_config": {
         "expectation_type": "expect_column_min_to_be_between