In [1]:
import requests
import pandas as pd
from datetime import datetime, timedelta

In [2]:
def get_historical_data():
    # 1. Define Dates (2 years ago to yesterday)
    end_date = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')
    start_date = (datetime.now() - timedelta(days=730)).strftime('%Y-%m-%d')
    
    # 2. Open-Meteo Historical Air Quality URL
    aq_url = "https://air-quality-api.open-meteo.com/v1/air-quality"
    aq_params = {
        "latitude": 24.8607,
        "longitude": 67.0011,
        "hourly": "pm2_5,pm10",
        "start_date": start_date,
        "end_date": end_date,
        "timezone": "auto"
    }
    
    # 3. Open-Meteo Historical Weather URL
    weather_url = "https://archive-api.open-meteo.com/v1/archive"
    weather_params = {
        "latitude": 24.8607,
        "longitude": 67.0011,
        "hourly": "temperature_2m,relative_humidity_2m,wind_speed_10m",
        "start_date": start_date,
        "end_date": end_date,
        "timezone": "auto"
    }

    print(f"Fetching data from {start_date} to {end_date}...")
    
    aq_resp = requests.get(aq_url, params=aq_params).json()
    weather_resp = requests.get(weather_url, params=weather_params).json()
    
    df_aq = pd.DataFrame(aq_resp["hourly"])
    df_weather = pd.DataFrame(weather_resp["hourly"])
    
    # Merge and clean
    df = pd.merge(df_aq, df_weather, on="time")
    df['city'] = 'karachi'
    df['time'] = pd.to_datetime(df['time'])
    
    # IMPORTANT: Hopsworks likes timestamps to be in milliseconds for 'event_time'
    df['time'] = df['time'].apply(lambda x: int(x.timestamp() * 1000))
    
    return df

# EXECUTE FETCHING
karachi_df = get_historical_data()
print("Preview of backfill data:")
karachi_df.head()

Fetching data from 2024-01-30 to 2026-01-28...
Preview of backfill data:


Unnamed: 0,time,pm2_5,pm10,temperature_2m,relative_humidity_2m,wind_speed_10m,city
0,1706572800000,23.2,46.8,20.4,94,5.2,karachi
1,1706576400000,23.8,48.0,20.1,95,4.7,karachi
2,1706580000000,24.4,49.9,20.9,92,1.3,karachi
3,1706583600000,24.2,50.6,20.7,92,1.8,karachi
4,1706587200000,24.7,53.0,20.6,92,2.5,karachi


In [3]:
import hopsworks

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
from dotenv import load_dotenv
import os

In [None]:

load_dotenv(override=True)

api_key = os.getenv("HOPSWORKS_API_KEY")
print(f"Key loaded: {api_key[:5]}...")

Key loaded: DEtLJ...


In [6]:
# 1. Login
load_dotenv(override=True)

api_key = os.getenv("HOPSWORKS_API_KEY")
project_name = os.getenv("HOPSWORKS_PROJECT_NAME")

if not api_key:
    raise ValueError("HOPSWORKS_API_KEY not loaded")

if not project_name:
    raise ValueError("HOPSWORKS_PROJECT_NAME not loaded")

project = hopsworks.login(
    api_key_value=api_key,
    project=project_name
)

fs = project.get_feature_store()

# 2. Create the Feature Group
aqi_fg = fs.get_or_create_feature_group(
    name="karachi_aqi_weather",
    version=1,
    primary_key=['city', 'time'],
    event_time='time',
    description="Hourly Air Quality and Weather data for Karachi (Open-Meteo)",
    online_enabled=True
)

# 3. Insert the Data
print("Starting upload to Hopsworks Feature Store...")
aqi_fg.insert(karachi_df)
print("Done! Check your Hopsworks dashboard under 'Feature Groups' to see your data.")

2026-01-29 10:17:47,294 INFO: Initializing external client
2026-01-29 10:17:47,296 INFO: Base URL: https://c.app.hopsworks.ai:443




To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2026-01-29 10:17:50,964 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1357978
Starting upload to Hopsworks Feature Store...
Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/1357978/fs/1344579/fg/1984871


Uploading Dataframe: 100.00% |██████████| Rows 17520/17520 | Elapsed Time: 00:11 | Remaining Time: 00:00


Launching job: karachi_aqi_weather_1_offline_fg_materialization


ConnectionError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))