In [1]:
import os
os.chdir("../")
from datetime import timedelta
from aladdin import PostgreSQLConfig, RedshiftSQLConfig, RedisConfig, FeatureView, CombinedFeatureView, FeatureViewMetadata, CombinedFeatureViewMetadata, Entity, String, Bool, UUID, Int32, Int64, Float, Double, CreatedAtTimestamp, Ratio, Contains, Equals, EventTimestamp, DateComponent, TimeDifferance, DifferanceBetween, FileSource, FeatureStore
import numpy as np

In [2]:
from aladdin.repo_definition import RepoDefinition
from aladdin.online_source import BatchOnlineSource
os.chdir("example")
store = FeatureStore.from_definition(RepoDefinition(set(), set(), {}, BatchOnlineSource()))

In [3]:
class Match(FeatureView):

    metadata = FeatureViewMetadata(
        name="match",
        description="Features about football matches",
        tags={},
        batch_source=FileSource(
            path="https://raw.githubusercontent.com/footballcsv/cache.footballdata/master/2020-21/eng.1.csv",
            mapping_keys={
                "HT": "half_time_score",
                "FT": "full_time_score",
                "Team 2": "team_2",
                "Team 1": "team_1",
                "Date": "date",
            }
        )
    )

    team_1 = Entity(String())
    team_2 = Entity(String())
    date = EventTimestamp(max_join_with=timedelta(days=365))

    full_time_score = String().description("the scores at full time")
    half_time_score = String().description("the scores at half time")

    is_liverpool = (team_1 == "Liverpool").description("If the home team is Liverpool")

    score_array = half_time_score.split("-")

    half_time_team_1_score = score_array.transformed_sync(lambda df: df["score_array"].str[0].replace({np.nan: 0}).astype(int))
    half_time_team_2_score = score_array.transformed_sync(lambda df: df["score_array"].str[1].replace({np.nan: 0}).astype(int))

    half_time_differance = half_time_team_1_score - half_time_team_2_score
    half_time_sum = half_time_team_1_score + half_time_team_2_score


store.add_feature_view(Match())

df = await store.all_for("match").to_df()

0      0-1
1      1-0
2      3-2
3      0-0
4      0-0
      ... 
180    2-0
181    0-0
182    1-1
183    0-0
184    2-0
Name: half_time_score, Length: 185, dtype: object


In [4]:
df

Unnamed: 0,date,team_2,full_time_score,half_time_team_1_score,score_array,half_time_differance,is_liverpool,team_1,half_time_score,half_time_sum,half_time_team_2_score
0,2020-09-12 00:00:00+00:00,Arsenal,0-3,0,"[0, 1]",-1,False,Fulham,0-1,1,1
1,2020-09-12 00:00:00+00:00,Southampton,1-0,1,"[1, 0]",1,False,Crystal Palace,1-0,1,0
2,2020-09-12 00:00:00+00:00,Leeds,4-3,3,"[3, 2]",1,True,Liverpool,3-2,5,2
3,2020-09-12 00:00:00+00:00,Newcastle,0-2,0,"[0, 0]",0,False,West Ham,0-0,0,0
4,2020-09-13 00:00:00+00:00,Leicester,0-3,0,"[0, 0]",0,False,West Brom,0-0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
180,2021-01-19 00:00:00+00:00,Chelsea,2-0,2,"[2, 0]",2,False,Leicester,2-0,2,0
181,2021-01-20 00:00:00+00:00,Aston Villa,2-0,0,"[0, 0]",0,False,Man City,0-0,0,0
182,2021-01-20 00:00:00+00:00,Man United,1-2,1,"[1, 1]",0,False,Fulham,1-1,2,1
183,2021-01-21 00:00:00+00:00,Burnley,0-1,0,"[0, 0]",0,True,Liverpool,0-0,0,0


In [5]:
await store.features_for({
    "team_1": ["Man City", "Leeds"],
    "team_2": ["Liverpool", "Arsenal"],
},
features=[
    "match:date",
    "match:full_time_score",
    "match:score_array",
    "match:half_time_team_1_score",
]
).to_df()

FeatureReferance(name='score_array', feature_view='match', dtype=FeatureType(name='string'), is_derivied=True)
defaultdict(<class 'dict'>, {'local_file/https://raw.githubusercontent.com/footballcsv/cache.footballdata/master/2020-21/eng.1.csv': {'team_1': ['Man City', 'Leeds'], 'team_2': ['Liverpool', 'Arsenal']}})
             Team 1       Team 2
0            Fulham      Arsenal
1    Crystal Palace  Southampton
2         Liverpool        Leeds
3          West Ham    Newcastle
4         West Brom    Leicester
..              ...          ...
180       Leicester      Chelsea
181        Man City  Aston Villa
182          Fulham   Man United
183       Liverpool      Burnley
184     Aston Villa    Newcastle

[185 rows x 2 columns]
3
2
              date full_time_score    team_1 half_time_score     team_2
0  Sat Oct 17 2020             1-0  Man City             1-0    Arsenal
1   Sun Nov 8 2020             1-1  Man City             1-1  Liverpool
Index(['date', 'full_time_score', 'team_1', 

Unnamed: 0,date,full_time_score,team_1,half_time_team_1_score,score_array,team_2
0,2020-10-17 00:00:00+00:00,1-0,Man City,1,"[1, 0]",Arsenal
1,2020-11-08 00:00:00+00:00,1-1,Man City,1,"[1, 1]",Liverpool


In [6]:
await store.all_for("match", limit=20).to_df()

0     0-1
1     1-0
2     3-2
3     0-0
4     0-0
5     0-0
6     0-1
7     0-2
8     2-1
9     2-1
10    0-1
11    1-1
12    1-1
13    0-2
14    0-0
15    1-1
16    0-0
17    0-2
18    1-1
19    1-2
Name: half_time_score, dtype: object


Unnamed: 0,date,team_2,full_time_score,half_time_team_1_score,score_array,half_time_differance,is_liverpool,team_1,half_time_score,half_time_sum,half_time_team_2_score
0,2020-09-12 00:00:00+00:00,Arsenal,0-3,0,"[0, 1]",-1,False,Fulham,0-1,1,1
1,2020-09-12 00:00:00+00:00,Southampton,1-0,1,"[1, 0]",1,False,Crystal Palace,1-0,1,0
2,2020-09-12 00:00:00+00:00,Leeds,4-3,3,"[3, 2]",1,True,Liverpool,3-2,5,2
3,2020-09-12 00:00:00+00:00,Newcastle,0-2,0,"[0, 0]",0,False,West Ham,0-0,0,0
4,2020-09-13 00:00:00+00:00,Leicester,0-3,0,"[0, 0]",0,False,West Brom,0-0,0,0
5,2020-09-13 00:00:00+00:00,Everton,0-1,0,"[0, 0]",0,False,Tottenham,0-0,0,0
6,2020-09-14 00:00:00+00:00,Chelsea,1-3,0,"[0, 1]",-1,False,Brighton,0-1,1,1
7,2020-09-14 00:00:00+00:00,Wolves,0-2,0,"[0, 2]",-2,False,Sheffield United,0-2,2,2
8,2020-09-19 00:00:00+00:00,West Brom,5-2,2,"[2, 1]",1,False,Everton,2-1,3,1
9,2020-09-19 00:00:00+00:00,Fulham,4-3,2,"[2, 1]",1,False,Leeds,2-1,3,1


In [16]:
async def some_feature_transformer(df):
    psql: PostgreSQLConfig = PostgreSQLConfig.localhost()
    enricher = psql.data_enricher("SELECT * FROM football.eng.5")
    data = await enricher.load()
    return data["score"].mean() - df["team_1_score"]


class OtherMatches(FeatureView):
    metadata = FeatureViewMetadata(
        name="other_matches",
        description="Features about football matches",
        tags={},
        batch_source=FileSource(
            path="https://raw.githubusercontent.com/footballcsv/england/master/2010s/2019-20/eng.1.csv",
            mapping_keys={
                "FT": "full_time_score",
                "Team 2": "team_2",
                "Team 1": "team_1",
                "Date": "date",
                "Round": "round"
            }
        )
    )

    team_1 = Entity(String())
    team_2 = Entity(String())
    date = EventTimestamp(max_join_with=timedelta(days=365))
    
    round = Int32()

    full_time_score = String().description("the scores at full time")

    score_array = full_time_score.split("-")

    team_1_score = score_array.transformed_sync(lambda df: df["score_array"].str[0].replace({np.nan: 0}).astype(int))
    team_2_score = score_array.transformed_sync(lambda df: df["score_array"].str[1].replace({np.nan: 0}).astype(int))

    differance = team_1_score - team_2_score


    some_feature = team_1_score.transformed(some_feature_transformer)



store.add_feature_view(OtherMatches())

df = await store.features_for({
    "team_1": ["Crystal Palace FC"],
    "team_2": ["Everton FC"]
}, features=[
    # "match:half_time_team_1_score",
    # "match:is_liverpool",

    "other_matches:score_array",
    "other_matches:differance",
    "other_matches:team_1_score",
    "other_matches:team_2_score",
]).to_df()

FeatureReferance(name='team_1_score', feature_view='other_matches', dtype=FeatureType(name='string'), is_derivied=True)
FeatureReferance(name='score_array', feature_view='other_matches', dtype=FeatureType(name='string'), is_derivied=True)
FeatureReferance(name='team_2_score', feature_view='other_matches', dtype=FeatureType(name='string'), is_derivied=True)
FeatureReferance(name='score_array', feature_view='other_matches', dtype=FeatureType(name='string'), is_derivied=True)
FeatureReferance(name='score_array', feature_view='other_matches', dtype=FeatureType(name='string'), is_derivied=True)
FeatureReferance(name='score_array', feature_view='other_matches', dtype=FeatureType(name='string'), is_derivied=True)
defaultdict(<class 'dict'>, {'local_file/https://raw.githubusercontent.com/footballcsv/england/master/2010s/2019-20/eng.1.csv': {'team_1': ['Crystal Palace FC'], 'team_2': ['Everton FC']}})
                  Team 1                Team 2
0           Liverpool FC       Norwich City FC


In [15]:
df

Unnamed: 0,team_2,team_1,team_1_score,differance,score_array,team_2_score
0,,,0,0,,0
1,,,0,0,,0
