In [1]:
import os
os.chdir("../")
from datetime import timedelta
from aladdin import PostgreSQLConfig, RedshiftSQLConfig, RedisConfig, FeatureView, CombinedFeatureView, FeatureViewMetadata, CombinedFeatureViewMetadata, Entity, String, Bool, UUID, Int32, Int64, Float, Double, CreatedAtTimestamp, Ratio, Contains, Equals, EventTimestamp, DateComponent, TimeDifferance, DifferanceBetween, FileSource, FeatureStore
from aladdin.model import ModelService
import numpy as np

In [2]:
os.chdir("example")
store = FeatureStore.experimantal()

In [3]:
class Match(FeatureView):

    metadata = FeatureViewMetadata(
        name="match",
        description="Features about football matches",
        tags={},
        batch_source=FileSource(
            path="https://raw.githubusercontent.com/footballcsv/cache.footballdata/master/2020-21/eng.1.csv",
            mapping_keys={
                "HT": "half_time_score",
                "FT": "full_time_score",
                "Team 2": "team_2",
                "Team 1": "team_1",
                "Date": "date",
            }
        )
    )

    team_1 = Entity(String())
    team_2 = Entity(String())
    date = EventTimestamp(max_join_with=timedelta(days=365))

    full_time_score = String().description("the scores at full time")
    half_time_score = String().description("the scores at half time")

    is_liverpool = (team_1 == "Liverpool").description("If the home team is Liverpool")

    score_array = half_time_score.split("-")

    half_time_team_1_score = score_array.transformed(lambda df: df["score_array"].str[0].replace({np.nan: 0}).astype(int))
    half_time_team_2_score = score_array.transformed(lambda df: df["score_array"].str[1].replace({np.nan: 0}).astype(int))

    half_time_differance = half_time_team_1_score - half_time_team_2_score
    half_time_sum = half_time_team_1_score + half_time_team_2_score


store.add_feature_view(Match())

df = await store.all_for("match").to_df()

In [4]:
df

Unnamed: 0,date,team_1,full_time_score,half_time_score,team_2,is_liverpool,score_array,half_time_team_2_score,half_time_team_1_score,half_time_differance,half_time_sum
0,2020-09-12 00:00:00+00:00,Fulham,0-3,0-1,Arsenal,False,"[0, 1]",1,0,-1,1
1,2020-09-12 00:00:00+00:00,Crystal Palace,1-0,1-0,Southampton,False,"[1, 0]",0,1,1,1
2,2020-09-12 00:00:00+00:00,Liverpool,4-3,3-2,Leeds,True,"[3, 2]",2,3,1,5
3,2020-09-12 00:00:00+00:00,West Ham,0-2,0-0,Newcastle,False,"[0, 0]",0,0,0,0
4,2020-09-13 00:00:00+00:00,West Brom,0-3,0-0,Leicester,False,"[0, 0]",0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
180,2021-01-19 00:00:00+00:00,Leicester,2-0,2-0,Chelsea,False,"[2, 0]",0,2,2,2
181,2021-01-20 00:00:00+00:00,Man City,2-0,0-0,Aston Villa,False,"[0, 0]",0,0,0,0
182,2021-01-20 00:00:00+00:00,Fulham,1-2,1-1,Man United,False,"[1, 1]",1,1,0,2
183,2021-01-21 00:00:00+00:00,Liverpool,0-1,0-0,Burnley,True,"[0, 0]",0,0,0,0


In [7]:
await store.features_for({
    "team_1": ["Man City", "Man City"],
    "team_2": ["Arsenal", "Liverpool"],
},
features=[
    "match:date",
    "match:full_time_score",
    "match:score_array",
    "match:half_time_team_1_score",
]
).to_df()

Unnamed: 0,score_array,half_time_team_1_score,team_1,date,full_time_score,team_2
0,"[1, 0]",1,Man City,2020-10-17 00:00:00+00:00,1-0,Arsenal
1,"[1, 1]",1,Man City,2020-11-08 00:00:00+00:00,1-1,Liverpool


In [8]:
await store.all_for("match", limit=20).to_df()

Unnamed: 0,date,team_1,full_time_score,half_time_score,team_2,is_liverpool,score_array,half_time_team_2_score,half_time_team_1_score,half_time_differance,half_time_sum
0,2020-09-12 00:00:00+00:00,Fulham,0-3,0-1,Arsenal,False,"[0, 1]",1,0,-1,1
1,2020-09-12 00:00:00+00:00,Crystal Palace,1-0,1-0,Southampton,False,"[1, 0]",0,1,1,1
2,2020-09-12 00:00:00+00:00,Liverpool,4-3,3-2,Leeds,True,"[3, 2]",2,3,1,5
3,2020-09-12 00:00:00+00:00,West Ham,0-2,0-0,Newcastle,False,"[0, 0]",0,0,0,0
4,2020-09-13 00:00:00+00:00,West Brom,0-3,0-0,Leicester,False,"[0, 0]",0,0,0,0
5,2020-09-13 00:00:00+00:00,Tottenham,0-1,0-0,Everton,False,"[0, 0]",0,0,0,0
6,2020-09-14 00:00:00+00:00,Brighton,1-3,0-1,Chelsea,False,"[0, 1]",1,0,-1,1
7,2020-09-14 00:00:00+00:00,Sheffield United,0-2,0-2,Wolves,False,"[0, 2]",2,0,-2,2
8,2020-09-19 00:00:00+00:00,Everton,5-2,2-1,West Brom,False,"[2, 1]",1,2,1,3
9,2020-09-19 00:00:00+00:00,Leeds,4-3,2-1,Fulham,False,"[2, 1]",1,2,1,3


In [7]:
async def some_feature_transformer(df):
    psql: PostgreSQLConfig = PostgreSQLConfig.localhost()
    enricher = psql.data_enricher("SELECT * FROM football.eng.5")
    data = await enricher.load()
    return data["score"].mean() - df["team_1_score"]


class OtherMatches(FeatureView):
    metadata = FeatureViewMetadata(
        name="other_matches",
        description="Features about football matches",
        tags={},
        batch_source=FileSource(
            path="https://raw.githubusercontent.com/footballcsv/england/master/2010s/2019-20/eng.1.csv",
            mapping_keys={
                "FT": "full_time_score",
                "Team 2": "team_2",
                "Team 1": "team_1",
                "Date": "date",
                "Round": "round"
            }
        )
    )

    team_1 = Entity(String())
    team_2 = Entity(String())
    date = EventTimestamp(max_join_with=timedelta(days=365))
    
    round = Int32()

    full_time_score = String().description("the scores at full time")

    score_array = full_time_score.split("-")

    team_1_score = score_array.transformed_sync(lambda df: df["score_array"].str[0].replace({np.nan: 0}).astype(int))
    team_2_score = score_array.transformed_sync(lambda df: df["score_array"].str[1].replace({np.nan: 0}).astype(int))

    differance = team_1_score - team_2_score


    some_feature = team_1_score.transformed(some_feature_transformer)



store.add_feature_view(OtherMatches())

df = await store.features_for({
    "team_1": ["Crystal Palace FC"],
    "team_2": ["Everton FC"]
}, features=[
    # "match:half_time_team_1_score",
    # "match:is_liverpool",

    "other_matches:score_array",
    "other_matches:differance",
    "other_matches:team_1_score",
    "other_matches:team_2_score",
]).to_df()

In [8]:
df

Unnamed: 0,score_array,team_2,differance,team_1_score,team_2_score,team_1
0,"[0, 0]",Everton FC,0,0,0,Crystal Palace FC


In [9]:
test_model = ModelService(
    features=[
        Match.select_all(),
        OtherMatches.select(lambda view: [
            view.team_1_score,
            view.team_2_score
        ]),
    ],
    name="test_model"
)
store.add_model_service(test_model)

In [10]:
await store.model("test_model").features_for({
    "team_1": ["Man City", "Leeds"],
    "team_2": ["Liverpool", "Arsenal"],
}).to_df()

Unnamed: 0,score_array,score_array.1,team_1_score,date,half_time_sum,half_time_team_1_score,is_liverpool,team_1,team_1.1,half_time_team_2_score,team_2,team_2.1,team_2_score,half_time_differance,half_time_score,full_time_score,full_time_score.1
0,"[1, 0]",,0,2020-10-17 00:00:00+00:00,1,1,False,Man City,,0,Arsenal,,0,1,1-0,1-0,
1,"[1, 1]",,0,2020-11-08 00:00:00+00:00,2,1,False,Man City,,1,Liverpool,,0,0,1-1,1-1,


In [11]:
await store.feature_view("match").previous(days=500).to_df()

local_file


Unnamed: 0,date,team_1,full_time_score,half_time_score,team_2,score_array,is_liverpool,half_time_team_1_score,half_time_team_2_score,half_time_sum,half_time_differance
175,2021-01-17 00:00:00+00:00,Sheffield United,1-3,0-2,Tottenham,"[0, 2]",False,0,2,2,-2
176,2021-01-17 00:00:00+00:00,Liverpool,0-0,0-0,Man United,"[0, 0]",True,0,0,0,0
177,2021-01-17 00:00:00+00:00,Man City,4-0,1-0,Crystal Palace,"[1, 0]",False,1,0,1,1
178,2021-01-18 00:00:00+00:00,Arsenal,3-0,0-0,Newcastle,"[0, 0]",False,0,0,0,0
179,2021-01-19 00:00:00+00:00,West Ham,2-1,1-0,West Brom,"[1, 0]",False,1,0,1,1
180,2021-01-19 00:00:00+00:00,Leicester,2-0,2-0,Chelsea,"[2, 0]",False,2,0,2,2
181,2021-01-20 00:00:00+00:00,Man City,2-0,0-0,Aston Villa,"[0, 0]",False,0,0,0,0
182,2021-01-20 00:00:00+00:00,Fulham,1-2,1-1,Man United,"[1, 1]",False,1,1,2,0
183,2021-01-21 00:00:00+00:00,Liverpool,0-1,0-0,Burnley,"[0, 0]",True,0,0,0,0
184,2021-01-23 00:00:00+00:00,Aston Villa,2-0,2-0,Newcastle,"[2, 0]",False,2,0,2,2


In [13]:
await store.feature_view("match").all(limit=20).to_df()

Unnamed: 0,date,team_1,full_time_score,half_time_score,team_2,score_array,is_liverpool,half_time_team_1_score,half_time_team_2_score,half_time_sum,half_time_differance
0,2020-09-12 00:00:00+00:00,Fulham,0-3,0-1,Arsenal,"[0, 1]",False,0,1,1,-1
1,2020-09-12 00:00:00+00:00,Crystal Palace,1-0,1-0,Southampton,"[1, 0]",False,1,0,1,1
2,2020-09-12 00:00:00+00:00,Liverpool,4-3,3-2,Leeds,"[3, 2]",True,3,2,5,1
3,2020-09-12 00:00:00+00:00,West Ham,0-2,0-0,Newcastle,"[0, 0]",False,0,0,0,0
4,2020-09-13 00:00:00+00:00,West Brom,0-3,0-0,Leicester,"[0, 0]",False,0,0,0,0
5,2020-09-13 00:00:00+00:00,Tottenham,0-1,0-0,Everton,"[0, 0]",False,0,0,0,0
6,2020-09-14 00:00:00+00:00,Brighton,1-3,0-1,Chelsea,"[0, 1]",False,0,1,1,-1
7,2020-09-14 00:00:00+00:00,Sheffield United,0-2,0-2,Wolves,"[0, 2]",False,0,2,2,-2
8,2020-09-19 00:00:00+00:00,Everton,5-2,2-1,West Brom,"[2, 1]",False,2,1,3,1
9,2020-09-19 00:00:00+00:00,Leeds,4-3,2-1,Fulham,"[2, 1]",False,2,1,3,1
