In [3]:
from Features.df_functions import *
from hsfs.feature import Feature
import datetime
import requests
import pandas as pd
import hopsworks
import json
import re
import os
import warnings
warnings.filterwarnings("ignore")
from dotenv import load_dotenv
load_dotenv()

HOPSWORKS_API_KEY = os.getenv("HOPSWORKS_API_KEY")

In [5]:
project = hopsworks.login()

2025-12-18 15:36:25,059 INFO: Initializing external client
2025-12-18 15:36:25,059 INFO: Base URL: https://c.app.hopsworks.ai:443






2025-12-18 15:36:27,226 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1327256


In [6]:
import great_expectations as ge
from great_expectations.core import ExpectationSuite, ExpectationConfiguration

birding_suite = ExpectationSuite(
    expectation_suite_name="birding_suite"
)

# RAIN: values >= 0
birding_suite.add_expectation(
    ExpectationConfiguration(
        expectation_type="expect_column_values_to_be_between",
        kwargs={
            "column": "RAIN",
            "min_value": 0,
        },))

# WIND: values >= 0
birding_suite.add_expectation(
    ExpectationConfiguration(
        expectation_type="expect_column_values_to_be_between",
        kwargs={
            "column": "WIND",
            "min_value": 0,
        },))
# TEMPERATURE: reasonable physical range
birding_suite.add_expectation(
    ExpectationConfiguration(
        expectation_type="expect_column_values_to_be_between",
        kwargs={
            "column": "TEMPERATURE",
            "min_value": -50,
            "max_value": 50,
        },))

# WEATHERCODE: must be integer
birding_suite.add_expectation(
    ExpectationConfiguration(
        expectation_type="expect_column_values_to_be_of_type",
        kwargs={
            "column": "WEATHERCODE",
            "type_": "int",
        },))


{"expectation_type": "expect_column_values_to_be_of_type", "kwargs": {"column": "WEATHERCODE", "type_": "int"}, "meta": {}}

In [None]:
fs = project.get_feature_store() 

birding_fg = fs.get_or_create_feature_group(
    name='Birding',
    description='Birding characteristics of each day',
    version=1,
    primary_key=['BIRD_TYPE', "REGION"],
    event_time="OBSERVATION_DATE",
    expectation_suite=birding_suite
)


In [None]:
birding_fg.insert(historical())

birding_fg.update_feature_description(
    "REGION",
    "Swedish administrative region (landskap) where the bird observation was recorded"
)

birding_fg.update_feature_description(
    "OBSERVATION_DATE",
    "Calendar date on which the bird observation was made"
)

birding_fg.update_feature_description(
    "WIND",
    "Average daily wind speed (km/h) for the region on the observation date"
)

birding_fg.update_feature_description(
    "RAIN",
    "Total daily precipitation (mm) for the region on the observation date"
)

birding_fg.update_feature_description(
    "WEATHERCODE",
    "Categorical weather condition code describing the dominant daily weather pattern"
)

birding_fg.update_feature_description(
    "TEMPERATURE",
    "Average daily air temperature (Â°C) for the region on the observation date"
)

birding_fg.update_feature_description(
    "OBSERVATION_COUNT",
    "Number of individual birds observed for the given species, region, and date"
)

birding_fg.update_feature_description(
    "TIME_OBSERVATIONS_STARTED",
    "Time of day when the bird observation effort began (HH:MM:SS)"
)

birding_fg.update_feature_description(
    "BIRD_TYPE",
    "Species identifier for the observed bird (e.g., whteag, goleag)"
)

birding_fg.update_feature_description(
    "YEAR",
    "Year index normalized relative to the start year of the dataset"
)

# One-hot encoded month indicators
birding_fg.update_feature_description(
    "MONTH_1", "Indicator variable equal to 1 if the observation occurred in January"
)
birding_fg.update_feature_description(
    "MONTH_2", "Indicator variable equal to 1 if the observation occurred in February"
)
birding_fg.update_feature_description(
    "MONTH_3", "Indicator variable equal to 1 if the observation occurred in March"
)
birding_fg.update_feature_description(
    "MONTH_4", "Indicator variable equal to 1 if the observation occurred in April"
)
birding_fg.update_feature_description(
    "MONTH_5", "Indicator variable equal to 1 if the observation occurred in May"
)
birding_fg.update_feature_description(
    "MONTH_6", "Indicator variable equal to 1 if the observation occurred in June"
)
birding_fg.update_feature_description(
    "MONTH_7", "Indicator variable equal to 1 if the observation occurred in July"
)
birding_fg.update_feature_description(
    "MONTH_8", "Indicator variable equal to 1 if the observation occurred in August"
)
birding_fg.update_feature_description(
    "MONTH_9", "Indicator variable equal to 1 if the observation occurred in September"
)
birding_fg.update_feature_description(
    "MONTH_10", "Indicator variable equal to 1 if the observation occurred in October"
)
birding_fg.update_feature_description(
    "MONTH_11", "Indicator variable equal to 1 if the observation occurred in November"
)
birding_fg.update_feature_description(
    "MONTH_12", "Indicator variable equal to 1 if the observation occurred in December"
)
