In [None]:
# Install hopsworks if needed (works in notebooks without C++ tools)
# !pip install hopsworks -q

In [None]:
import pandas as pd
import numpy as np
import hopsworks
from pathlib import Path

print("✅ Libraries loaded")

In [None]:
# Configuration
HOPSWORKS_API_KEY = "dFdleK9lKWVq6T1J.2KdDK261vN2LOu0RkRAxyYTN5Xatfe8fabu6pRL4NBzeSfHWbd71Z5lliB7mzDFv"
PROJECT_NAME = "api_predictor"
FEATURE_GROUP_NAME = "islamabad_aqi_features"

print(f"Project: {PROJECT_NAME}")
print(f"Feature Group: {FEATURE_GROUP_NAME}")

In [None]:
# Load the prepared features
df = pd.read_parquet("data/processed/islamabad_features.parquet")
print(f"Loaded: {df.shape}")
df.head()

In [None]:
# Prepare for Hopsworks
df_upload = df.copy()

# Drop NaN rows (from lag features)
df_upload = df_upload.dropna()
print(f"After dropping NaN: {df_upload.shape}")

# Add required columns
df_upload['observation_id'] = range(len(df_upload))
df_upload['event_time'] = pd.to_datetime(df_upload['timestamp'])

# Convert booleans to int
bool_cols = df_upload.select_dtypes(include=['bool']).columns
for col in bool_cols:
    df_upload[col] = df_upload[col].astype(int)

# Drop non-numeric problematic columns for Hopsworks
df_upload = df_upload.drop(columns=['city'], errors='ignore')

print(f"Ready for upload: {df_upload.shape}")

In [None]:
# Connect to Hopsworks
project = hopsworks.login(
    api_key_value=HOPSWORKS_API_KEY,
    project=PROJECT_NAME
)
fs = project.get_feature_store()
print(f"✅ Connected to Feature Store")

In [None]:
# Create Feature Group
fg = fs.get_or_create_feature_group(
    name=FEATURE_GROUP_NAME,
    version=1,
    description="Islamabad AQI prediction features with weather, pollution, and time-series engineering",
    primary_key=['observation_id'],
    event_time='event_time',
    online_enabled=True
)
print(f"✅ Feature group created: {FEATURE_GROUP_NAME}")

In [None]:
# Insert data
print(f"Uploading {len(df_upload):,} records...")
fg.insert(df_upload, write_options={"wait_for_job": True})
print("✅ Upload complete!")

In [None]:
# Create Feature View for training
target_cols = ['target_1h', 'target_6h', 'target_12h', 'target_24h', 'target_48h', 'target_72h']

fv = fs.get_or_create_feature_view(
    name="islamabad_aqi_view",
    version=1,
    description="Feature view for AQI prediction training",
    query=fg.select_all(),
    labels=target_cols
)
print("✅ Feature view created!")

In [None]:
# Get training data
X_train, X_test, y_train, y_test = fv.train_test_split(test_size=0.2)
print(f"Training set: {X_train.shape}")
print(f"Test set: {X_test.shape}")

## ✅ Done!

Features are now in Hopsworks and ready for:
- Model training with `fv.train_test_split()`
- Online inference via Feature Store API
- Version control and lineage tracking