In [7]:
import requests
import pandas as pd
from datetime import datetime, timedelta

In [8]:
def get_historical_data():
    
    end_date = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')
    start_date = (datetime.now() - timedelta(days=730)).strftime('%Y-%m-%d')
    
    
    aq_url = "https://air-quality-api.open-meteo.com/v1/air-quality"
    aq_params = {
        "latitude": 24.8607,
        "longitude": 67.0011,
        "hourly": "pm2_5,pm10",
        "start_date": start_date,
        "end_date": end_date,
        "timezone": "auto"
    }
    
    
    weather_url = "https://archive-api.open-meteo.com/v1/archive"
    weather_params = {
        "latitude": 24.8607,
        "longitude": 67.0011,
        "hourly": "temperature_2m,relative_humidity_2m,wind_speed_10m",
        "start_date": start_date,
        "end_date": end_date,
        "timezone": "auto"
    }

    print(f"Fetching data from {start_date} to {end_date}...")
    
    aq_resp = requests.get(aq_url, params=aq_params).json()
    weather_resp = requests.get(weather_url, params=weather_params).json()
    
    df_aq = pd.DataFrame(aq_resp["hourly"])
    df_weather = pd.DataFrame(weather_resp["hourly"])
    
    # Merge and clean
    df = pd.merge(df_aq, df_weather, on="time")
    df['city'] = 'karachi'
    df['time'] = pd.to_datetime(df['time'])
    
    
    df['time'] = df['time'].apply(lambda x: int(x.timestamp() * 1000))
    
    return df

karachi_df = get_historical_data()
print("Preview of backfill data:")
karachi_df.head()

Fetching data from 2024-02-14 to 2026-02-12...
Preview of backfill data:


Unnamed: 0,time,pm2_5,pm10,temperature_2m,relative_humidity_2m,wind_speed_10m,city
0,1707868800000,59.5,91.5,18.9,86,8.0,karachi
1,1707872400000,57.2,86.8,18.0,89,9.2,karachi
2,1707876000000,53.2,80.8,17.5,80,8.5,karachi
3,1707879600000,49.2,75.0,16.9,68,8.5,karachi
4,1707883200000,45.2,69.4,16.0,62,8.7,karachi


In [9]:
import hopsworks

In [10]:
from dotenv import load_dotenv
import os

In [11]:

load_dotenv(override=True)

api_key = os.getenv("HOPSWORKS_API_KEY")
print(f"Key loaded: {api_key[:5]}...")

Key loaded: ji9Y6...


In [12]:
# 1. Login
load_dotenv(override=True)

api_key = os.getenv("HOPSWORKS_API_KEY")
project_name = os.getenv("HOPSWORKS_PROJECT_NAME")

if not api_key:
    raise ValueError("HOPSWORKS_API_KEY not loaded")

if not project_name:
    raise ValueError("HOPSWORKS_PROJECT_NAME not loaded")

project = hopsworks.login(
    api_key_value=api_key,
    project=project_name
)

fs = project.get_feature_store()


aqi_fg = fs.get_or_create_feature_group(
    name="karachi_aqi_weather",
    version=1,
    primary_key=['city', 'time'],
    event_time='time',
    description="Hourly Air Quality and Weather data for Karachi (Open-Meteo)",
    online_enabled=True
)

# List only the columns that the Feature Group expects
# We leave out 'pm10' because the model doesn't use it and the FG doesn't have it
allowed_columns = [
    'city', 
    'time', 
    'temperature_2m', 
    'relative_humidity_2m', 
    'wind_speed_10m', 
    'pm2_5'
]

final_df = karachi_df[allowed_columns]

print("Starting upload to Hopsworks...")

aqi_fg.insert(final_df)
print("Done! Check your Hopsworks dashboard under 'Feature Groups' to see your data.")

2026-02-13 10:09:46,487 INFO: Closing external client and cleaning up certificates.
Connection closed.
2026-02-13 10:09:46,496 INFO: Initializing external client
2026-02-13 10:09:46,497 INFO: Base URL: https://c.app.hopsworks.ai:443
2026-02-13 10:09:48,822 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1357978
Starting upload to Hopsworks...


Uploading Dataframe: 100.00% |██████████| Rows 17520/17520 | Elapsed Time: 00:12 | Remaining Time: 00:00


Launching job: karachi_aqi_weather_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1357978/jobs/named/karachi_aqi_weather_1_offline_fg_materialization/executions
Done! Check your Hopsworks dashboard under 'Feature Groups' to see your data.
