In [None]:
import hsfs
import pandas as pd

In [None]:
connection = hsfs.connection()
fs = connection.get_feature_store()

In [None]:
df = pd.read_csv("hdfs:///Projects/live_coding/RawData/profiles.csv")

In [None]:
def compute_age(birthdate: pd.Series) -> pd.Series:
    from datetime import timedelta
    
    return (pd.to_datetime('2022-01-01 00:00:24') - pd.to_datetime(birthdate)) / timedelta(days=365)

In [None]:
df['age'] = compute_age(df['birthdate'])
df['event_time'] = pd.to_datetime('2024-03-06 00:00:24') # Hardcode date time for demo so PIT Join works as expected

In [None]:
df

In [None]:
df = df[['cc_num', 'event_time', 'Country', 'age']]

In [None]:
from great_expectations.core import ExpectationSuite, ExpectationConfiguration

expectation_suite = ExpectationSuite(expectation_suite_name="profiles_suite")

expectation_suite.add_expectation(
    ExpectationConfiguration(
        expectation_type="expect_column_values_to_be_between",
        kwargs={
            "column":"age", 
            "min_value":0,
            "max_value":120,
        }
    )
)

In [None]:
df

In [None]:
profiles = fs.get_or_create_feature_group(
    name="profiles",
    version=1,
    description="Credit card owner profile",
    primary_key=['cc_num'],
    partition_key=['country'],
    event_time="event_time",
    online_enabled=True,
    statistics_config={'histograms': True, 'correlations': True},
    expectation_suite=expectation_suite
)

In [None]:
profiles.insert(df)

In [None]:
feature_descriptions = [
    {"name": "cc_num", "description": "Number of the credit card performing the transaction"},
    {"name": "country", "description": "Country of residence of the card holder"},
    {"name": "age", "description": "Age of the card holder at the event time"},
]

for desc in feature_descriptions: 
    profiles.update_feature_description(desc["name"], desc["description"])