In [1]:
from aligned import FileSource
import polars as pl
import os

In [2]:
# Need to run from the root folder, because we use a relative file source
os.chdir("../")

## Load Our Feature Definitions

In [3]:
store = await FileSource.json_at("feature-store.json").feature_store()

## Select the samples to train on - aka. our entity ids

In [4]:
entities = pl.scan_parquet("data/loan_table.parquet").select([
    pl.col("loan_id"),
    pl.col("dob_ssn"),
    pl.col("zipcode"),
    pl.col("event_timestamp")
]).collect().to_dict(as_series=False)

## Train the model

In [5]:
from loan_example.model import CreditScoringModel

model = CreditScoringModel(store.model("credit_scoring"))
await model.train(entities)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  training_data.data[self.categorical_features] = self.encoder.transform(


## Stor Our Model to Disk

In [6]:
# And store it back in the loan folder
model.dump_model("loan_example/model.bin", "loan_example/encoder.bin")

## Predict Using the Batch Source

In [7]:
from datetime import datetime

prediction_entity = {
    "zipcode": [76104],
    "dob_ssn": ["19630621_4278"],
    "loan_id": [None],
    "event_timestamp": [datetime.now()],
    "person_age": [133],
    "person_income": [59000],
    "person_home_ownership": ["RENT"],
    "person_emp_length": [123.0],
    "loan_intent": ["PERSONAL"],
    "loan_amount": [35000],
    "loan_int_rate": [16.02],
}
await model.predict(prediction_entity)

array([1.])

## Fill our Online Source with Features

### Create a new feature store with the online source that we prefer

In [8]:
from aligned import RedisConfig

online_store = store.with_source(RedisConfig.localhost())

### Select the entity ids to materialise the source with

In [9]:
zipcodes = pl.scan_parquet("data/zipcode_table.parquet").select([
    pl.col("zipcode"),
    pl.col("event_timestamp")
]).unique(subset="zipcode", keep="last").collect().to_dict(as_series=False)

credit_history = pl.scan_parquet("data/credit_history.parquet").select([
    pl.col("dob_ssn"),
    pl.col("event_timestamp")
]).unique(subset="dob_ssn", keep="last").collect().to_dict()

### Load, compute and fill our `zipcode_feature` and `credit_history` features

In [10]:
await online_store.feature_view("zipcode_features").batch_write(
    store.feature_view("zipcode_features").features_for(zipcodes)
)

In [11]:
await online_store.feature_view("credit_history").batch_write(
    store.feature_view("credit_history").features_for(credit_history)
)

### Let's load some features in our Online Source

In [12]:
await online_store.model("credit_scoring").features_for(prediction_entity).to_pandas()

Unnamed: 0,state,bankruptcies,missed_payments_2y,hard_pulls,credit_card_due,person_age,event_timestamp,total_wages,student_loan_due,loan_int_rate,...,person_emp_length,vehicle_loan_due,missed_payments_6m,is_primary_location,loan_id,zipcode,mortgage_due,loan_amount,loan_intent_ordinal,person_home_ownership_ordinal
0,TX,0,0,1,3343,133,2023-05-23 22:37:04.089483,142325465,44375,16.02,...,123.0,11506,0,True,,76104,378847,35000,0,0
