# <span style='color:#ff5f27'> Initialization

### Hopsworks Settings

In [60]:
import sys
from pathlib import Path
import warnings
warnings.filterwarnings("ignore", module="IPython")

def is_google_colab() -> bool:
    if "google.colab" in str(get_ipython()):
        return True
    return False

def clone_repository() -> None:
    !git clone https://github.com/featurestorebook/mlfs-book.git
    %cd mlfs-book

def install_dependencies() -> None:
    !pip install --upgrade uv
    !uv pip install --all-extras --system --requirement pyproject.toml

if is_google_colab():
    clone_repository()
    install_dependencies()
    root_dir = str(Path().absolute())
    print("Google Colab environment")
else:
    root_dir = Path().absolute()
    # Strip ~/notebooks/ccfraud from PYTHON_PATH if notebook started in one of these subdirectories
    if root_dir.parts[-1:] == ('airquality',):
        root_dir = Path(*root_dir.parts[:-1])
    if root_dir.parts[-1:] == ('notebooks',):
        root_dir = Path(*root_dir.parts[:-1])
    root_dir = str(root_dir) 
    print("Local environment")

print(f"Root dir: {root_dir}")

# Add the root directory to the `PYTHONPATH` 
if root_dir not in sys.path:
    sys.path.append(root_dir)
    print(f"Added the following directory to the PYTHONPATH: {root_dir}")

# Set the environment variables from the file <root_dir>/.env
from mlfs import config
settings = config.HopsworksSettings(_env_file=f"{root_dir}/.env")

Local environment
Root dir: c:\Users\nives\AllFolders\SML\sml-bike-sharing
HopsworksSettings initialized!


### Imports

In [61]:
import datetime
import requests
import pandas as pd
import hopsworks
from mlfs import util
import datetime
from pathlib import Path
import json
import re
import os
import warnings
warnings.filterwarnings("ignore")

### Hopsworks Login

In [62]:
project = hopsworks.login()

2026-01-04 17:58:42,763 INFO: Closing external client and cleaning up certificates.
2026-01-04 17:58:42,766 INFO: Connection closed.
2026-01-04 17:58:42,769 INFO: Initializing external client
2026-01-04 17:58:42,769 INFO: Base URL: https://c.app.hopsworks.ai:443






2026-01-04 17:58:44,487 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1279175


# <span style='color:#ff5f27'> Check CityaBikes API

### Set City Data

In [63]:
today = datetime.date.today()
city = 'Trento'
country = 'IT'
latitude = 46.07
longitude = 11.12

### Perform a Request

In [64]:
# No API-key is required for this API

network_id = "e-motion-trento" 
url = f"https://api.citybik.es/v2/networks/{network_id}"

try:
    resp = requests.get(url).json()
    print("Request Successful!")
except:
    print("Something went wrong, please check the URL.")
    print("Networn Answer:")
    print(requests.get(url))


Request Successful!


### Visualize Answer

These info shall coincide with the variables set in the "Set City Data" cell.

In [65]:
# Print Info
print(f"Network ID: {resp['network']['id']}")

print(f"City: {resp['network']['location']['city']}")
print(f"Contry: {resp['network']['location']['country']}")
print(f"Latitude: {resp['network']['location']['latitude']}")
print(f"Longitude: {resp['network']['location']['longitude']}")

# Print first 5 stations
stations = resp['network']['stations']
print("\nFirst five stations:")
for s in stations[:5]:
    print(f"{s['name']} -> Bikes: {s['free_bikes']}, Empty slots: {s['empty_slots']}")


Network ID: e-motion-trento
City: Trento
Contry: IT
Latitude: 46.06643205823519
Longitude: 11.122145390351879

First five stations:
10.02 Top Center -> Bikes: 2, Empty slots: 12
20.10 Noriglio -> Bikes: 4, Empty slots: 4
20.09 Sacco -> Bikes: 8, Empty slots: 3
10.18 Vannetti -> Bikes: 0, Empty slots: 0
11.01 Ospedale San Giovanni -> Bikes: 3, Empty slots: 9


# <span style='color:#ff5f27'> Load Historcal Data

### Read Files

In [66]:
file_path = f"{root_dir}/bike-historical-data/202501-e-motion-trento-stats.parquet"

historical_df = pd.read_parquet(file_path, engine='pyarrow')
historical_df.head()

Unnamed: 0,tag,id,nuid,name,latitude,longitude,bikes,free,extra,timestamp
0,e-motion-trento,00d841f5a7a286ba0717de688b6994a2,1122,10.02 Top Center,46.090104,11.118584,3,11,"{""uid"":""1122"",""number"":1002,""status"":""online"",...",2025-01-01 13:33:42
1,e-motion-trento,00d841f5a7a286ba0717de688b6994a2,1122,10.02 Top Center,46.090104,11.118584,2,12,"{""uid"":""1122"",""number"":1002,""status"":""online"",...",2025-01-02 03:30:44
2,e-motion-trento,00d841f5a7a286ba0717de688b6994a2,1122,10.02 Top Center,46.090104,11.118584,1,13,"{""uid"":""1122"",""number"":1002,""status"":""online"",...",2025-01-02 05:42:42
3,e-motion-trento,00d841f5a7a286ba0717de688b6994a2,1122,10.02 Top Center,46.090104,11.118584,0,14,"{""uid"":""1122"",""number"":1002,""status"":""online"",...",2025-01-02 06:18:42
4,e-motion-trento,00d841f5a7a286ba0717de688b6994a2,1122,10.02 Top Center,46.090104,11.118584,1,13,"{""uid"":""1122"",""number"":1002,""status"":""online"",...",2025-01-02 08:03:42


### Print Info

In [70]:
print(historical_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10162 entries, 0 to 10161
Data columns (total 10 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   tag        10162 non-null  object        
 1   id         10162 non-null  object        
 2   nuid       10162 non-null  object        
 3   name       10162 non-null  object        
 4   latitude   10162 non-null  float64       
 5   longitude  10162 non-null  float64       
 6   bikes      10162 non-null  int32         
 7   free       10162 non-null  int32         
 8   extra      10162 non-null  object        
 9   timestamp  10162 non-null  datetime64[us]
dtypes: datetime64[us](1), float64(2), int32(2), object(5)
memory usage: 714.6+ KB
None
