In [4]:

from flask import Flask, make_response, request
from flask_caching import Cache
from pyspark.sql import SparkSession, DataFrame, DataFrameReader
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, FloatType
from pyspark.sql.functions import col, last, sum, avg, count

from util.databridge import Databridge
from transform.penalty_cards_agg import penalty_cards_agg
import schema.data_structs as schema
import data.paths as data_routes


In [None]:
data = Databridge(data_location='local[8]', name='API')
reader = data.get_reader()

data.add_dataframes([
    (
        reader.csv(
            data_routes.trmkt_appearences,
            header=True,
            schema=schema.trmkt_appearences
        ),
        'trmkt_appearences'
    ),
    (
        reader.csv(
            data_routes.trmkt_clubs,
            header=True,
            schema=schema.trmkt_clubs
        ),
        'trmkt_clubs'
    ),
    (
        reader.csv(
            data_routes.trmkt_competitions,
            header=True,
            schema=schema.trmkt_competitions
        ),
        'trmkt_competitions'
    ),   
    (
        reader.csv(
            data_routes.trmkt_games,
            header=True,
            schema=schema.trmkt_games
        ),
        'trmkt_games'
    ),
    (
        reader.csv(
            data_routes.trmkt_leagues,
            header=True,
            schema=schema.trmkt_leagues
        ),
        'trmkt_leagues'
    ),
    (
        reader.csv(
            data_routes.trmkt_players,
            header=True,
            schema=schema.trmkt_players
        ),
        'trmkt_players'
    ),
])

for df, key in data.get_dataframes():
    df.printSchema()


In [None]:
import json 

player_app_df = data.join_stored('trmkt_appearences', 'trmkt_players', 'player_id')

sums = [
    sum("yellow_cards"), 
    sum('red_cards'),
]

str_list = player_app_df.select(
    'player_id',
    'yellow_cards',
    'red_cards',
    'minutes_played',
    'position',
    'sub_position'
).where('position == "Defender"') \
.groupBy('player_id') \
.agg(
    last('position'),
    last('sub_position'), 
    *sums,
    avg('minutes_played'),
    count('player_id').alias('n_games')
).orderBy('sum(yellow_cards)', ascending=False).toPandas().set_index("player_id").to_json(orient="index")

str_list[:200]



In [None]:
json_str = '['+','.join(str_list)+']'

json_obj = json.loads(json_str)



In [None]:
test = [
    ("asd", 123),
    ("ad", 23),
    ("sd", 12),
    ("as", 13),
]

for id, num in test:
    print(id, num)

In [None]:
mape = {
    "id"    : 12,
    "as"    : 213,
    "asdd"  : 123    
}

[(thing, mape[thing]) for thing in mape]
