# 2. Update data daily

In [None]:
import pickle
import pandas as pd
import hopsworks
import os

# from keys import TOMTOM_API_KEY, TOMTOM_API_KEY2, TOMTOM_API_KEY3, CALENDAR_API_KEY
TOMTOM_API_KEY = os.getenv('TOMTOM_API_KEY')
TOMTOM_API_KEY2 = os.getenv('TOMTOM_API_KEY2')
TOMTOM_API_KEY3 = os.getenv('TOMTOM_API_KEY3')
CALENDAR_API_KEY = os.getenv('CALENDAR_API_KEY')

from functions.TomTomAPI import get_traffic_map_from_grid
from functions.HolidaysnWeather import get_weather, is_holiday

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
coordinates = 59.34318, 18.05141 # Stockholm near Odenplan
zoom = 20

TOMTOM_keys = [TOMTOM_API_KEY, TOMTOM_API_KEY2, TOMTOM_API_KEY3]

### 2.1. Collect traffic data

In [3]:
# # Get the repository root path
repo_root = os.getenv("GITHUB_WORKSPACE", os.getcwd())

# Construct the absolute path to grid.pickle
pickle_path = os.path.join(repo_root, "variables", "grid.pickle")
# pickle_path = 'variables/grid.pickle'

# Getting grid variable from memory
with open(pickle_path, 'rb') as file:
    # Deserialize and retrieve grid from pickle file
    grid = pickle.load(file)

In [4]:
# Collecting traffic flow from grid
success = False
i = 0
while i < len(TOMTOM_keys) and not success:
    try:
        traffic_map = get_traffic_map_from_grid(TOMTOM_keys[i],grid, zoom = zoom)
        success = True
    except:
        print('Failed with key:', i)
        i += 1

Failed with key: 0
num of requests: 303


In [5]:
from shapely.geometry import LineString
import datetime

traffic_df = pd.DataFrame(traffic_map)
traffic_df['coordinates'] = traffic_df['coordinates'].apply(lambda x: LineString([(point['longitude'], point['latitude']) for point in x['coordinate']]))
traffic_df = traffic_df.drop_duplicates()
traffic_df['coordinates'] = traffic_df['coordinates'].apply(lambda x: str(x))

traffic_df['relativeSpeed'] = traffic_df.apply(lambda x: x['currentSpeed'] - x['freeFlowSpeed'], axis = 1)

today = datetime.datetime.now()
today = today.replace(second=0, microsecond=0, minute=0, hour=today.hour) + datetime.timedelta(hours=today.minute//30)
traffic_df['date']= [today for i in range(traffic_df.shape[0])]

traffic_df = traffic_df.drop(columns = ['@version'])

print(traffic_df.shape)
traffic_df.head()

(187, 10)


Unnamed: 0,frc,currentSpeed,freeFlowSpeed,currentTravelTime,freeFlowTravelTime,confidence,roadClosure,coordinates,relativeSpeed,date
0,FRC7,12,12,28,28,1.0,False,LINESTRING (18.05720504328079 59.3492455882513...,0,2024-12-18 16:00:00
1,FRC1,26,37,44,31,1.0,False,LINESTRING (18.0666464190154 59.34681285368509...,-11,2024-12-18 16:00:00
2,FRC1,44,63,360,251,1.0,False,LINESTRING (18.047719411097432 59.335373216516...,-19,2024-12-18 16:00:00
3,FRC1,29,44,204,134,1.0,False,LINESTRING (18.055381141150207 59.351231764189...,-15,2024-12-18 16:00:00
4,FRC4,6,25,429,103,1.0,False,LINESTRING (18.060517571414948 59.338708520808...,-19,2024-12-18 16:00:00


### 2.2. Collect weather data

In [6]:
weather_df = get_weather(coordinates)
weather_df

Unnamed: 0,date,temperature_2m_max,temperature_2m_min,precipitation_sum,wind_speed_10m_max,wind_direction_10m_dominant
0,2024-12-18 16:00:00,5.826,-0.674,4.5,24.48,186.990036


### 2.3. Collect weather data

In [7]:
# CALENDAR_API_KEY = os.getenv('CALENDAR_API_KEY')
holiday_status = is_holiday(CALENDAR_API_KEY)
weather_df['holiday_status'] = holiday_status
weather_df

Unnamed: 0,date,temperature_2m_max,temperature_2m_min,precipitation_sum,wind_speed_10m_max,wind_direction_10m_dominant,holiday_status
0,2024-12-18 16:00:00,5.826,-0.674,4.5,24.48,186.990036,0


### 2.4. Joining data and uploading to Hopsworks

In [10]:
# Get the API key from GitHub Secrets
os.environ["HOPSWORKS_API_KEY"] = os.getenv('HOPSWORKS_API_KEY')
# with open('hopsworks-api-key.txt','r') as file:
#     os.environ["HOPSWORKS_API_KEY"] = file.read().rstrip()

# Get AQI API KEY from secrets of hopsworks
proj = hopsworks.login(project="ID2223LAB1KTH", api_key_value=os.environ["HOPSWORKS_API_KEY"])

2024-12-18 16:01:06,417 INFO: Closing external client and cleaning up certificates.
Connection closed.
2024-12-18 16:01:06,421 INFO: Initializing external client
2024-12-18 16:01:06,423 INFO: Base URL: https://c.app.hopsworks.ai:443
2024-12-18 16:01:07,604 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1170583


In [11]:
# Initialize the feature store
fs = proj.get_feature_store()

# Define and insert the Forecast Weather Feature Group
feature_group_name = "stockholm_traffic"
version = 1
fg = fs.get_feature_group(name=feature_group_name, version=version)
fg.insert(traffic_df)

# Define and insert the Air Quality Feature Group
feature_group_name = "stockholm_weather_holiday"
version = 1
fg = fs.get_feature_group(name=feature_group_name, version=version)
fg.insert(weather_df)

Uploading Dataframe: 100.00% |██████████| Rows 187/187 | Elapsed Time: 00:00 | Remaining Time: 00:00


Launching job: stockholm_traffic_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1170583/jobs/named/stockholm_traffic_1_offline_fg_materialization/executions


Uploading Dataframe: 100.00% |██████████| Rows 1/1 | Elapsed Time: 00:01 | Remaining Time: 00:00


Launching job: stockholm_weather_holiday_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1170583/jobs/named/stockholm_weather_holiday_1_offline_fg_materialization/executions


(Job('stockholm_weather_holiday_1_offline_fg_materialization', 'SPARK'), None)