# Introdução

Para rodar esse teste, primeiro a Feature Store deve ser configurada. Para isso, o comando feast apply deve ser executado no diretório criado para a Feature Store, que contém o arquivo .py com as configurações de FeatureViews e FeatureServices, além do arquivo .yaml que configura a Feature Store.

Passos executados para configurar a Feature Store:

1. Para instalar o feast: pip install feast
2. Criação da pasta da Feature Store ("adult_income_project") com os arquivos .yaml e .py
3. feast apply no diretório da Feature Store
4. Para passar dados para online store: feast materialize 2024-01-01T00:00:00 2024-12-31T00:00:00

# Configuração Inicial

In [1]:
# Importando bibliotecas
import os
from datetime import datetime
import pandas as pd
from feast import FeatureStore


# Definindo paths
path_codigos = os.getcwd()
path_projeto = os.path.dirname(path_codigos)
path_data = os.path.join(path_projeto, "02_data")
path_bronze = os.path.join(path_data, "01_bronze")
path_silver = os.path.join(path_data, "02_silver")
feature_repo_path = os.path.join(path_projeto, "adult_income_project")

# Instanciando a Feature Store
store = FeatureStore(repo_path = feature_repo_path)



# Explorando a Feature Store

## Entities

In [2]:
for cur_entity in store.list_entities():
    print(cur_entity)

{
  "spec": {
    "name": "adult",
    "joinKey": "adult_id"
  },
  "meta": {
    "createdTimestamp": "2024-10-03T10:18:30.851776Z",
    "lastUpdatedTimestamp": "2024-10-03T10:18:30.851776Z"
  }
}


## Feature Views

In [4]:
# Listando Feature Views
for cur_fv in store.list_batch_feature_views():
    print(cur_fv)

{
  "spec": {
    "name": "adult_hist_social_data",
    "entities": [
      "adult"
    ],
    "features": [
      {
        "name": "age",
        "valueType": "INT64"
      },
      {
        "name": "race",
        "valueType": "STRING"
      },
      {
        "name": "education",
        "valueType": "STRING"
      },
      {
        "name": "education-num",
        "valueType": "INT64"
      },
      {
        "name": "marital-status",
        "valueType": "STRING"
      },
      {
        "name": "sex",
        "valueType": "STRING"
      },
      {
        "name": "native-country",
        "valueType": "STRING"
      }
    ],
    "ttl": "0s",
    "batchSource": {
      "type": "BATCH_FILE",
      "timestampField": "event_timestamp",
      "fileOptions": {
        "uri": "/Users/pedrosilva/Documents/01_Projetos/03_Feast_Test/02_data/02_silver/adult_dataset_hist.parquet"
      },
      "dataSourceClassType": "feast.infra.offline_stores.file_source.FileSource",
      "name": "adul

## Feature Services

In [5]:
for cur_fs in store.list_feature_services():
    print(cur_fs)

{
  "spec": {
    "name": "adult_income_inf_v2",
    "features": [
      {
        "featureViewName": "adult_inf_social_data",
        "featureColumns": [
          {
            "name": "age",
            "valueType": "INT64"
          },
          {
            "name": "race",
            "valueType": "STRING"
          },
          {
            "name": "education",
            "valueType": "STRING"
          },
          {
            "name": "education-num",
            "valueType": "INT64"
          },
          {
            "name": "marital-status",
            "valueType": "STRING"
          },
          {
            "name": "sex",
            "valueType": "STRING"
          },
          {
            "name": "native-country",
            "valueType": "STRING"
          }
        ]
      },
      {
        "featureViewName": "adult_inf_income_data",
        "featureColumns": [
          {
            "name": "workclass",
            "valueType": "STRING"
          },
        

# Feature Retrieval (Offline Store)

## Instanciando feature services

In [7]:
# Feature Services - v1
adult_income_fs_train_v1 = store.get_feature_service("adult_income_train_v1")
adult_income_fs_inf_v1 = store.get_feature_service("adult_income_inf_v1")

# Feature Services - v2
adult_income_fs_train_v2 = store.get_feature_service("adult_income_train_v2")
adult_income_fs_inf_v2 = store.get_feature_service("adult_income_inf_v2")

## Capturando historical features para treino

In [16]:
# Entidades de interesse
entity_df = pd.DataFrame({"adult_id": [0, 1, 2], "event_timestamp": [datetime(2022,10,1), datetime(2022,10,1), datetime(2022,10,1)]})
entity_df

Unnamed: 0,adult_id,event_timestamp
0,0,2022-10-01
1,1,2022-10-01
2,2,2022-10-01


In [17]:
# Train v1
training_df = store.get_historical_features(entity_df = entity_df, features = adult_income_fs_train_v1)
training_df.to_df()



Unnamed: 0,adult_id,event_timestamp,race,education,workclass,occupation,capital-gain,capital-loss,income
0,0,2022-10-01 00:00:00+00:00,White,Bachelors,State-gov,Adm-clerical,2174,0,<=50K
1,1,2022-10-01 00:00:00+00:00,White,Bachelors,Self-emp-not-inc,Exec-managerial,0,0,<=50K
2,2,2022-10-01 00:00:00+00:00,White,HS-grad,Private,Handlers-cleaners,0,0,<=50K


In [18]:
# Train v2
training_df = store.get_historical_features(entity_df = entity_df, features = adult_income_fs_train_v2)
training_df.to_df()



Unnamed: 0,adult_id,event_timestamp,age,race,education,education-num,marital-status,sex,native-country,workclass,occupation,capital-gain,capital-loss,income
0,0,2022-10-01 00:00:00+00:00,39,White,Bachelors,13,Never-married,Male,United-States,State-gov,Adm-clerical,2174,0,<=50K
1,1,2022-10-01 00:00:00+00:00,50,White,Bachelors,13,Married-civ-spouse,Male,United-States,Self-emp-not-inc,Exec-managerial,0,0,<=50K
2,2,2022-10-01 00:00:00+00:00,38,White,HS-grad,9,Divorced,Male,United-States,Private,Handlers-cleaners,0,0,<=50K


## Capturando historical features para batch processing

In [19]:
# Entidades de interesse
entity_df = pd.DataFrame({"adult_id": [48839, 48840, 48841], "event_timestamp": [datetime(2024,10,1), datetime(2024,10,1), datetime(2024,10,1)]})
entity_df

Unnamed: 0,adult_id,event_timestamp
0,48839,2024-10-01
1,48840,2024-10-01
2,48841,2024-10-01


In [20]:
# Inference v2
inf_df = store.get_historical_features(entity_df = entity_df, features = adult_income_fs_inf_v2)
inf_df.to_df()



Unnamed: 0,adult_id,event_timestamp,age,race,education,education-num,marital-status,sex,native-country,workclass,occupation,capital-gain,capital-loss
0,48839,2024-10-01 00:00:00+00:00,38,White,Bachelors,13,Married-civ-spouse,Male,United-States,Private,Prof-specialty,0,0
1,48840,2024-10-01 00:00:00+00:00,44,Asian-Pac-Islander,Bachelors,13,Divorced,Male,United-States,Private,Adm-clerical,5455,0
2,48841,2024-10-01 00:00:00+00:00,35,White,Bachelors,13,Married-civ-spouse,Male,United-States,Self-emp-inc,Exec-managerial,0,0


# Feature Retrieval (Online Store)

## Explorando o online_store.db

In [12]:
import sqlite3
# Caminho para o arquivo online_store.db
db_path = os.path.join(feature_repo_path, "data", "online_store.db")

# Conectar ao banco de dados SQLite
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

# Listar todas as tabelas disponíveis no banco de dados
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = cursor.fetchall()

print("Tabelas no online_store.db:")
for table in tables:
    print(table[0])

Tabelas no online.db:
adult_income_project_adult_hist_income_data
adult_income_project_adult_hist_social_data
adult_income_project_adult_inf_income_data
adult_income_project_adult_inf_social_data


In [14]:
# Identificando dados na tabela adult_income_project_adult_inf_income_data
cursor.execute("SELECT * FROM adult_income_project_adult_inf_income_data;")

rows = cursor.fetchall()

print("\nDados da tabela adult_income_project_adult_inf_income_data:")
for row in rows:
    print(row)


Dados da tabela adult_income_project_adult_inf_income_data:
(b'\x02\x00\x00\x00adult_id\x04\x00\x00\x00\x04\x00\x00\x00\x81\xbb\x00\x00', 'workclass', b'\x12\tLocal-gov', None, '2024-10-01 00:00:00', None)
(b'\x02\x00\x00\x00adult_id\x04\x00\x00\x00\x04\x00\x00\x00\x81\xbb\x00\x00', 'occupation', b'\x12\x0cAdm-clerical', None, '2024-10-01 00:00:00', None)
(b'\x02\x00\x00\x00adult_id\x04\x00\x00\x00\x04\x00\x00\x00\x81\xbb\x00\x00', 'capital-gain', b' \x00', None, '2024-10-01 00:00:00', None)
(b'\x02\x00\x00\x00adult_id\x04\x00\x00\x00\x04\x00\x00\x00\x81\xbb\x00\x00', 'capital-loss', b' \x00', None, '2024-10-01 00:00:00', None)
(b'\x02\x00\x00\x00adult_id\x04\x00\x00\x00\x04\x00\x00\x00\xaa\xbd\x00\x00', 'workclass', b'\x12\x07Private', None, '2024-10-01 00:00:00', None)
(b'\x02\x00\x00\x00adult_id\x04\x00\x00\x00\x04\x00\x00\x00\xaa\xbd\x00\x00', 'occupation', b'\x12\x05Sales', None, '2024-10-01 00:00:00', None)
(b'\x02\x00\x00\x00adult_id\x04\x00\x00\x00\x04\x00\x00\x00\xaa\xbd\x00\

## Online feature retrieval

In [15]:
# Capturando features (inference_v2)
features = store.get_online_features(
    features=adult_income_fs_inf_v2,
    entity_rows=[
        {
            "adult_id": 48300,
        }
    ],
).to_dict()

features

  rows = cur.fetchall()


{'adult_id': [48300],
 'sex': ['Male'],
 'race': ['White'],
 'education-num': [13],
 'marital-status': ['Never-married'],
 'education': ['Bachelors'],
 'age': [34],
 'native-country': ['United-States'],
 'capital-gain': [0],
 'occupation': ['Sales'],
 'capital-loss': [0],
 'workclass': ['Private']}