In [3]:

from flask import Flask, make_response, request
from flask_caching import Cache
from pyspark.sql import SparkSession, DataFrame, DataFrameReader
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, FloatType
from pyspark.sql.functions import col, last, sum, avg, count

from util.databridge import Databridge
from util.queries import penalty_cards
import schema.data_structs as schema
import data.paths as data_routes


In [4]:
data = Databridge(data_location='local[8]', name='API')
reader = data.get_reader()

data.add_dataframes([
    (
        reader.csv(
            data_routes.trmkt_appearences,
            header=True,
            schema=schema.trmkt_appearences
        ),
        'trmkt_appearences'
    ),
    (
        reader.csv(
            data_routes.trmkt_clubs,
            header=True,
            schema=schema.trmkt_clubs
        ),
        'trmkt_clubs'
    ),
    (
        reader.csv(
            data_routes.trmkt_competitions,
            header=True,
            schema=schema.trmkt_competitions
        ),
        'trmkt_competitions'
    ),   
    (
        reader.csv(
            data_routes.trmkt_games,
            header=True,
            schema=schema.trmkt_games
        ),
        'trmkt_games'
    ),
    (
        reader.csv(
            data_routes.trmkt_leagues,
            header=True,
            schema=schema.trmkt_leagues
        ),
        'trmkt_leagues'
    ),
    (
        reader.csv(
            data_routes.trmkt_players,
            header=True,
            schema=schema.trmkt_players
        ),
        'trmkt_players'
    ),
])

for df, key in data.get_dataframes():
    df.printSchema()


root
 |-- player_id: integer (nullable = true)
 |-- game_id: integer (nullable = true)
 |-- appearence_id: integer (nullable = true)
 |-- competition_id: integer (nullable = true)
 |-- player_club_id: integer (nullable = true)
 |-- goals: integer (nullable = true)
 |-- assists: integer (nullable = true)
 |-- minutes_played: integer (nullable = true)
 |-- yellow_cards: integer (nullable = true)
 |-- red_cards: integer (nullable = true)

root
 |-- club_id: integer (nullable = true)
 |-- name: string (nullable = true)
 |-- pretty_name: string (nullable = true)
 |-- dcomestic_competition_id: string (nullable = true)
 |-- total_market_value: string (nullable = true)
 |-- squad_size: integer (nullable = true)
 |-- average_age: string (nullable = true)
 |-- foreigners_number: integer (nullable = true)
 |-- foreigners_percentage: float (nullable = true)
 |-- national_team_players: integer (nullable = true)
 |-- stadium_name: string (nullable = true)
 |-- stadium_seats: integer (nullable = true

In [16]:
import json 

player_app_df = data.join_stored('trmkt_appearences', 'trmkt_players', 'player_id')

sums = [
    sum("yellow_cards"), 
    sum('red_cards'),
]

str_list = player_app_df.select(
    'player_id',
    'yellow_cards',
    'red_cards',
    'minutes_played',
    'position',
    'sub_position'
).where('position == "Defender"') \
.groupBy('player_id') \
.agg(
    last('position'),
    last('sub_position'), 
    *sums,
    avg('minutes_played'),
    count('player_id').alias('n_games')
).orderBy('sum(yellow_cards)', ascending=False).toJSON().collect()

print(str_list)


['{"player_id":183647,"last(position)":"Defender","last(sub_position)":"Centre-Back","sum(yellow_cards)":97,"sum(red_cards)":1,"avg(minutes_played)":86.93846153846154,"n_games":260}', '{"player_id":25557,"last(position)":"Defender","last(sub_position)":"Centre-Back","sum(yellow_cards)":96,"sum(red_cards)":2,"avg(minutes_played)":87.12592592592593,"n_games":270}', '{"player_id":76746,"last(position)":"Defender","last(sub_position)":"Right-Back","sum(yellow_cards)":92,"sum(red_cards)":2,"avg(minutes_played)":85.69162995594714,"n_games":227}', '{"player_id":138927,"last(position)":"Defender","last(sub_position)":"Right-Back","sum(yellow_cards)":89,"sum(red_cards)":1,"avg(minutes_played)":82.11320754716981,"n_games":265}', '{"player_id":18944,"last(position)":"Defender","last(sub_position)":"Centre-Back","sum(yellow_cards)":86,"sum(red_cards)":2,"avg(minutes_played)":86.43478260869566,"n_games":322}', '{"player_id":93128,"last(position)":"Defender","last(sub_position)":"Centre-Back","sum(y

In [20]:
json_str = '['+','.join(str_list.toJSON().collect())+']'

AttributeError: 'list' object has no attribute 'toJSON'

In [3]:
test = [
    ("asd", 123),
    ("ad", 23),
    ("sd", 12),
    ("as", 13),
]

for id, num in test:
    print(id, num)

asd 123
ad 23
sd 12
as 13


In [4]:
mape = {
    "id"    : 12,
    "as"    : 213,
    "asdd"  : 123    
}

[(thing, mape[thing]) for thing in mape]


[('id', 12), ('as', 213), ('asdd', 123)]

In [19]:
test = ['SHIT AND ', 'SHIT ', 'MAKE POO']

thing = ","
thing.join(str_list)

thing

','

In [9]:
array = [400,-4,99,-20,8,20,-19,99,-7,55,-20,75,10,5,-9,491,999,25,-44,79,-76,41,-355,17,-9,95,-8,-11,4,617,-45,-7,95,-169,-12,6,433,33,813,37,-373,-11,95,500,-6,1983,-9,95,-82,52,-1,15,-2303,-10,45,-16,98,-67,29,-385,92,-16,98,7,16,98,-16,98,16,98,-8,5,-8,5]

negative = 0

positive = 0

for num in array:
    if num < 0:
        negative += num
    else:
        positive += num

print(negative)
print(positive)

-4126
8001
