# FS Feast

### Configuration

Создать 2 переменные среды 

```bash
os.environ["FEAST_CORE_URL"] = "path:6565"
os.environ["FEAST_SERVING_URL"] = "path:6566"
```

In [2]:
import os

from feast import Client, Feature, Entity, ValueType, FeatureTable
from feast.data_source import FileSource, KafkaSource
from feast.data_format import ParquetFormat, AvroFormat

In [3]:
# создаем клиента для FS
client = Client()

### Устанавливаем параметры фичей

In [None]:
# entity == key
driver_id = Entity(name="user_id", description="Общий агреграт по пользователю", value_type=ValueType.INT64)

In [None]:
# Обычные фичи по пользователю / агрегаты
acc_rate = Feature("acc_rate", ValueType.FLOAT)
conv_rate = Feature("conv_rate", ValueType.FLOAT)
avg_daily_trips = Feature("avg_daily_trips", ValueType.INT32)

```python
FeatureTable(
    name = "Общий агреграт по пользователю",
    entities = ["user_id"],
    features = [
        acc_rate,
        conv_rate,
        avg_daily_trips
    ]
    ...
)
```

In [None]:
# сохраняем как офлайн фичи
data_location = os.path.join(os.getenv("FEAST_LOCATION", "file:///data/warehouse/nameartem/"), "users_data")

In [6]:
user_source = os.path.join(data_location, "user_statistics")

# создаем таблицу с данными
users_statistics = FeatureTable(
    name = "Общий агреграт по пользователю",
    entities = ["user_id"],
    features = [
        acc_rate,
        conv_rate,
        avg_daily_trips
    ]
)

### Registering entities and feature tables in Feast Core

In [8]:
client.apply(users_statistics)

In [None]:
client.get_feature_table("user_statistics").to_yaml()

```yaml
spec:
  name: Общий агреграт по пользователю
  entities:
  - user_id
  features:
  - name: conv_rate
    valueType: FLOAT
  - name: avg_daily_trips
    valueType: INT32
  - name: acc_rate
    valueType: FLOAT
    fileOptions:
      fileFormat:
        parquetFormat: {}
      fileUrl: file:///data/warehouse/nameartem/
meta:
```

### Замена данных

In [13]:
user_stat_m = pd.DataFrame()

In [None]:
# ingest
client.ingest(users_statistics, user_stat_m)

### Загрузить данные

In [19]:
job = client.get_historical_features(
    feature_refs=[
        "user_statistics:avg_daily_trips",
        "user_statistics:conv_rate",
        "user_statistics:acc_rate"
    ]
)

In [None]:
pd.DataFrame(job.to_dict())

---------------------------------------

# FS HopsWorks

In [None]:
from hops import featurestore
from hops import serving
from hops import model
from hops import hdfs
import io.hops.util.Hops

In [None]:
# запись фичей
featurestore.insert_into_featuregroup(pd.DataFrame(), "название набора")

In [None]:
# чтение фичей 
features_df = featurestore.get_features(["set:feature_1", "set:feature_2", "set:feature_N"])

In [None]:
# чтение всего набора фичей (как pd.DataFrame)
featurestore.get_featuregroup("название набора", dataframe_type="pandas")

### Создание API

In [None]:
script_path = "путь до модели или где читаем фичи"

# проверям на существование и извлекаем
model.export(script_path, "DataSample")
if serving.exists("DataSample"):
    serving.delete("DataSample")

# прогоняем скрипт
serving.create_or_update(script_path, 
                         "DataSample",
                         serving_type="sklearn",
                         model_version=28)

# стартуем)
serving.start("DataSample")
response = serving.make_inference_request("LOCALHOST", "данные")

### Работы с HDFS

In [None]:
hdfs.ls("data/warehouse")
hdfs.cp()
hdfs.mkdir("data/warehouse/test_dir")
hdfs.rmr("data/warehouse/test_dir")
hdfs.move()
hdfs.chmod("data/warehouse", 700)
hdfs.exists("data/warehouse/")
hdfs.copy_to_hdfs("data/warehouse/test_dir", "/", overwrite=True)
hdfs.copy_to_local("data/warehouse/test_dir", overwrite=True)