In [87]:
import json
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler, Normalizer
from collections import OrderedDict

table = pd.read_json('data/table.json')
overall_table = table.sort_values(by=['tp'],ascending=False).reset_index(drop=True)
overall_table['pos'] = overall_table.index + 1
list(overall_table.dtypes)

[dtype('O'),
 dtype('int64'),
 dtype('int64'),
 dtype('int64'),
 dtype('int64'),
 dtype('int64'),
 dtype('int64'),
 dtype('int64'),
 dtype('int64'),
 dtype('int64'),
 dtype('int64'),
 dtype('int64'),
 dtype('O')]

In [91]:
def get_column_types(df, col_types):
    fields = df.columns
    num_rows = df.shape[0]
    avg_spaces = -1

    field_types = OrderedDict()

    for field in fields:
        if field in col_types:
            field_types[field] = col_types[field]
            continue
        field_type = df[field].dtype
        num_unique_values = df[field].nunique()
        if field_type == 'object':
            avg_spaces = df[field].str.count(' ').mean()

        # Automatically ignore `id`-related fields
        if field.lower() in ['id', 'uuid', 'guid', 'pk', 'name']:
            field_types[field] = 'ignore'

        # Datetime is a straightforward data type.
        elif field_type == 'datetime64[ns]':
            field_types[field] = 'datetime'

        # Assume a float is always numeric.
        elif field_type == 'float64':
            field_types[field] = 'numeric'

        # If it's an object where the contents has
        # many spaces on average, it's text
        elif field_type == 'object' and avg_spaces >= 2.0:
            field_types[field] = 'text'

        # If the field has very few distinct values, it's categorical
        elif num_unique_values <= 10:
            field_types[field] = 'categorical'

        # If the field has many distinct integers, assume numeric.
        elif field_type == 'int64':
            field_types[field] = 'numeric'

        # If the field has many distinct nonintegers, it's not helpful.
        elif num_unique_values > 0.9 * num_rows:
            field_types[field] = 'ignore'

        # The rest (e.g. bool) is categorical
        else:
            field_types[field] = 'categorical'

    # Print to console for user-level debugging
    print("Modeling with field specifications:")
    print("\n".join(["{}: {}".format(k, v) for k, v in field_types.items()]))

    field_types = {k: v for k, v in field_types.items() if v != 'ignore'}

    return field_types

In [94]:
overall_table

Unnamed: 0,team,pos,tm,tw,twr,two,tl,tlr,tlo,sg,cg,tp,division
0,Бостон,1,48,38,33,3,10,6,1,183,101,80,East
1,Каролина,2,47,30,22,5,17,9,6,154,126,68,East
2,Торонто,3,49,30,25,5,19,11,7,166,129,68,East
3,Нью-Джерси,4,48,31,24,6,17,13,3,168,129,66,East
4,Даллас,5,50,28,25,2,22,13,7,171,130,65,West
5,Виннипег,6,50,31,24,7,19,18,1,162,131,63,West
6,Тампа-Бэй,7,47,31,25,5,16,15,0,170,139,63,East
7,Вегас,8,49,29,21,5,20,17,2,158,142,61,West
8,Сиэтл,9,47,28,23,5,19,14,3,172,145,61,West
9,Рейнджерс,10,48,26,21,3,22,14,6,153,128,60,East


In [92]:
get_column_types(overall_table, {})

Modeling with field specifications:
team: ignore
pos: numeric
tm: categorical
tw: numeric
twr: numeric
two: categorical
tl: numeric
tlr: numeric
tlo: categorical
sg: numeric
cg: numeric
tp: numeric
division: categorical


{'pos': 'numeric',
 'tm': 'categorical',
 'tw': 'numeric',
 'twr': 'numeric',
 'two': 'categorical',
 'tl': 'numeric',
 'tlr': 'numeric',
 'tlo': 'categorical',
 'sg': 'numeric',
 'cg': 'numeric',
 'tp': 'numeric',
 'division': 'categorical'}

In [57]:
team_id = overall_table[['team', 'pos']].set_index('team').to_dict('dict')['pos']
team_id

{'Бостон': 1,
 'Каролина': 2,
 'Торонто': 3,
 'Нью-Джерси': 4,
 'Даллас': 5,
 'Виннипег': 6,
 'Тампа-Бэй': 7,
 'Вегас': 8,
 'Сиэтл': 9,
 'Рейнджерс': 10,
 'Лос-Анджелес': 11,
 'Вашингтон': 12,
 'Эдмонтон': 13,
 'Питтсбург': 14,
 'Миннесота': 15,
 'Колорадо': 16,
 'Калгари': 17,
 'Баффало': 18,
 'Нэшвилл': 19,
 'Флорида': 20,
 'Айлендерс': 21,
 'Детройт': 22,
 'Филадельфия': 23,
 'Сент-Луис': 24,
 'Оттава': 25,
 'Монреаль': 26,
 'Ванкувер': 27,
 'Сан-Хосе': 28,
 'Аризона': 29,
 'Анахайм': 30,
 'Чикаго': 31,
 'Коламбус': 32}

In [95]:
overall_table

Unnamed: 0,team,pos,tm,tw,twr,two,tl,tlr,tlo,sg,cg,tp,division
0,Бостон,1,48,38,33,3,10,6,1,183,101,80,East
1,Каролина,2,47,30,22,5,17,9,6,154,126,68,East
2,Торонто,3,49,30,25,5,19,11,7,166,129,68,East
3,Нью-Джерси,4,48,31,24,6,17,13,3,168,129,66,East
4,Даллас,5,50,28,25,2,22,13,7,171,130,65,West
5,Виннипег,6,50,31,24,7,19,18,1,162,131,63,West
6,Тампа-Бэй,7,47,31,25,5,16,15,0,170,139,63,East
7,Вегас,8,49,29,21,5,20,17,2,158,142,61,West
8,Сиэтл,9,47,28,23,5,19,14,3,172,145,61,West
9,Рейнджерс,10,48,26,21,3,22,14,6,153,128,60,East


In [161]:
def overall_table():
    table = pd.read_json('data/table.json')
    overall_table = table.sort_values(by=['tp'],ascending=False).reset_index(drop=True)
    overall_table['pos'] = overall_table.index + 1
    overall_table['win_rate'] = overall_table['tw'] / overall_table['tm']
    overall_table['lose_rate'] = overall_table['tl'] / overall_table['tm']
    overall_table['win_rate_ft'] = overall_table['twr'] / overall_table['tw']
    overall_table['lose_rate_ft'] = overall_table['tlr'] / overall_table['tl']
    overall_table['tie_ft_rate'] = (overall_table['tw'] - overall_table['twr'] + overall_table['tl'] - overall_table['tlr']) / overall_table['tm']
    overall_table['sg_per_game'] = overall_table['sg'] / overall_table['tm']
    overall_table['cg_per_game'] = overall_table['cg'] / overall_table['tm']
    overall_table = overall_table.drop(['tm','tw','twr','two','tl','tlr','tlo', 'division'], axis=1)
    overall_table.to_json("data/table.json", orient="records")
    
    table_home = pd.read_json('data/table_home.json')
    table_home['win_rate_home'] = table_home['tw'] / table_home['tm']
    table_home['lose_rate_home'] = table_home['tl'] / table_home['tm']
    table_home['sg_per_game_home'] = table_home['sg'] / table_home['tm']
    table_home['cg_per_game_home'] = table_home['cg'] / table_home['tm']
    table_home['tie_ft_rate_home'] = (table_home['tw'] - table_home['twr'] + table_home['tl'] - table_home['tlr']) / table_home['tm']
    table_home = table_home.rename(columns={'sg':'sg_home', 'cg':'cg_home', 'tp':'tp_home'})
    table_home = table_home.drop(['pos','tm','tw','twr','two','tl','tlr','tlo', 'division'], axis=1)
    table_home.to_json("data/table_home.json", orient="records")
    # overall_table = overall_table.join(table_home.set_index('team'), on='team')

    table_guest = pd.read_json('data/table_guest.json')
    table_guest['win_rate_guest'] = table_guest['tw'] / table_guest['tm']
    table_guest['lose_rate_guest'] = table_guest['tl'] / table_guest['tm']
    table_guest['sg_per_game_guest'] = table_guest['sg'] / table_guest['tm']
    table_guest['cg_per_game_guest'] = table_guest['cg'] / table_guest['tm']
    table_guest['tie_ft_rate_guest'] = (table_guest['tw'] - table_guest['twr'] + table_guest['tl'] - table_guest['tlr']) / table_guest['tm']
    table_guest = table_guest.rename(columns={'sg':'sg_guest', 'cg':'cg_guest', 'tp':'tp_guest'})
    table_guest = table_guest.drop(['pos','tm','tw','twr','two','tl','tlr','tlo', 'division'], axis=1)
    table_guest.to_json("data/table_guest.json", orient="records")
    # overall_table = overall_table.join(table_guest.set_index('team'), on='team')
    
    return overall_table

In [158]:
table = pd.read_json('data/table.json')
table

Unnamed: 0,team,pos,sg,cg,tp,win_rate,lose_rate,win_rate_ft,lose_rate_ft,tie_ft_rate,sg_per_game,cg_per_game
0,Бостон,1,183,101,80,0.791667,0.208333,0.868421,0.6,0.1875,3.8125,2.104167
1,Каролина,2,154,126,68,0.638298,0.361702,0.733333,0.529412,0.340426,3.276596,2.680851
2,Торонто,3,166,129,68,0.612245,0.387755,0.833333,0.578947,0.265306,3.387755,2.632653
3,Нью-Джерси,4,168,129,66,0.645833,0.354167,0.774194,0.764706,0.229167,3.5,2.6875
4,Даллас,5,171,130,65,0.56,0.44,0.892857,0.590909,0.24,3.42,2.6
5,Виннипег,6,162,131,63,0.62,0.38,0.774194,0.947368,0.16,3.24,2.62
6,Тампа-Бэй,7,170,139,63,0.659574,0.340426,0.806452,0.9375,0.148936,3.617021,2.957447
7,Вегас,8,158,142,61,0.591837,0.408163,0.724138,0.85,0.22449,3.22449,2.897959
8,Сиэтл,9,172,145,61,0.595745,0.404255,0.821429,0.736842,0.212766,3.659574,3.085106
9,Рейнджерс,10,153,128,60,0.541667,0.458333,0.807692,0.636364,0.270833,3.1875,2.666667


In [None]:
table = table.drop(['tm','tw','twr','two','tl','tlr','tlo', 'division'], axis=1)

In [162]:
df = overall_table()
df

Unnamed: 0,team,pos,sg,cg,tp,win_rate,lose_rate,win_rate_ft,lose_rate_ft,tie_ft_rate,sg_per_game,cg_per_game
0,Бостон,1,183,101,80,0.791667,0.208333,0.868421,0.6,0.1875,3.8125,2.104167
1,Каролина,2,154,126,68,0.638298,0.361702,0.733333,0.529412,0.340426,3.276596,2.680851
2,Торонто,3,166,129,68,0.612245,0.387755,0.833333,0.578947,0.265306,3.387755,2.632653
3,Нью-Джерси,4,168,129,66,0.645833,0.354167,0.774194,0.764706,0.229167,3.5,2.6875
4,Даллас,5,171,130,65,0.56,0.44,0.892857,0.590909,0.24,3.42,2.6
5,Виннипег,6,162,131,63,0.62,0.38,0.774194,0.947368,0.16,3.24,2.62
6,Тампа-Бэй,7,170,139,63,0.659574,0.340426,0.806452,0.9375,0.148936,3.617021,2.957447
7,Вегас,8,158,142,61,0.591837,0.408163,0.724138,0.85,0.22449,3.22449,2.897959
8,Сиэтл,9,172,145,61,0.595745,0.404255,0.821429,0.736842,0.212766,3.659574,3.085106
9,Рейнджерс,10,153,128,60,0.541667,0.458333,0.807692,0.636364,0.270833,3.1875,2.666667


In [143]:
table = table.join(df.set_index('team'), on='team')

ValueError: columns overlap but no suffix specified: Index(['sg_home', 'cg_home', 'tp_home', 'win_rate_home', 'lose_rate_home'], dtype='object')

In [144]:
table

Unnamed: 0,team,position,sg,cg,tp,sg_home,cg_home,tp_home,win_rate_home,lose_rate_home
0,Даллас,1,171,130,65,87,67,31,0.565217,0.434783
1,Виннипег,2,162,131,63,81,55,34,0.708333,0.291667
2,Сиэтл,3,172,145,61,73,71,27,0.5,0.5
3,Вегас,4,158,142,61,76,75,28,0.518519,0.481481
4,Лос-Анджелес,5,163,170,60,82,84,30,0.56,0.44
5,Эдмонтон,6,180,159,58,92,86,27,0.461538,0.538462
6,Миннесота,7,148,136,56,76,69,29,0.608696,0.391304
7,Колорадо,8,147,131,55,77,67,27,0.5,0.5
8,Калгари,9,152,150,55,82,77,30,0.56,0.44
9,Нэшвилл,10,137,141,54,71,66,31,0.583333,0.416667


In [119]:
table_home.head()

Unnamed: 0,team,pos,tm,tw,twr,two,tl,tlr,tlo,sg,cg,tp,division
0,Виннипег,1,24,17,14,3,7,7,0,81,55,34,West
1,Даллас,2,23,13,11,2,10,5,3,87,67,31,West
2,Нэшвилл,3,24,14,11,2,10,7,2,71,66,31,West
3,Калгари,4,25,14,12,2,11,9,1,82,77,30,West
4,Лос-Анджелес,5,25,14,11,2,11,9,2,82,84,30,West


ValueError: columns overlap but no suffix specified: Index(['pos', 'sg', 'cg', 'tp'], dtype='object')

In [104]:
df = overall_table()
df

Unnamed: 0,team,pos,sg,cg,tp,win_rate,lose_rate,win_rate_ft,lose_rate_ft,tie_ft_rate
0,Бостон,1,183,101,80,0.791667,0.208333,0.868421,0.6,0.1875
1,Каролина,2,154,126,68,0.638298,0.361702,0.733333,0.529412,0.340426
2,Торонто,3,166,129,68,0.612245,0.387755,0.833333,0.578947,0.265306
3,Нью-Джерси,4,168,129,66,0.645833,0.354167,0.774194,0.764706,0.229167
4,Даллас,5,171,130,65,0.56,0.44,0.892857,0.590909,0.24
5,Виннипег,6,162,131,63,0.62,0.38,0.774194,0.947368,0.16
6,Тампа-Бэй,7,170,139,63,0.659574,0.340426,0.806452,0.9375,0.148936
7,Вегас,8,158,142,61,0.591837,0.408163,0.724138,0.85,0.22449
8,Сиэтл,9,172,145,61,0.595745,0.404255,0.821429,0.736842,0.212766
9,Рейнджерс,10,153,128,60,0.541667,0.458333,0.807692,0.636364,0.270833


In [200]:
table_guest = pd.read_json('data/table_guest.json')
table_guest.head()

Unnamed: 0,team,sg_guest,cg_guest,tp_guest,win_rate_guest,lose_rate_guest,sg_per_game_guest,cg_per_game_guest,tie_ft_rate_guest
0,Сиэтл,99,74,34,0.695652,0.304348,4.304348,3.217391,0.130435
1,Даллас,84,63,34,0.555556,0.444444,3.111111,2.333333,0.185185
2,Вегас,82,67,33,0.681818,0.318182,3.727273,3.045455,0.318182
3,Эдмонтон,88,73,31,0.652174,0.347826,3.826087,3.173913,0.043478
4,Лос-Анджелес,81,86,30,0.52,0.48,3.24,3.44,0.36


In [207]:
table_home = pd.read_json('data/table_home.json')
table_home.head()

Unnamed: 0,team,pos,sg,cg,tp,win_rate,lose_rate,win_rate_ft,lose_rate_ft,tie_ft_rate,sg_per_game,cg_per_game
0,Бостон,1,183,101,80,0.791667,0.208333,0.868421,0.6,0.1875,3.8125,2.104167
1,Каролина,2,154,126,68,0.638298,0.361702,0.733333,0.529412,0.340426,3.276596,2.680851
2,Торонто,3,166,129,68,0.612245,0.387755,0.833333,0.578947,0.265306,3.387755,2.632653
3,Нью-Джерси,4,168,129,66,0.645833,0.354167,0.774194,0.764706,0.229167,3.5,2.6875
4,Даллас,5,171,130,65,0.56,0.44,0.892857,0.590909,0.24,3.42,2.6


In [203]:
future_games = pd.read_json('data/next_tour_games.json')
future_games

Unnamed: 0,home_team,guest_team,date
0,Айлендерс,Детройт,2023-01-28 03:00:00
1,Каролина,Сан-Хосе,2023-01-28 03:00:00
2,Рейнджерс,Вегас,2023-01-28 03:00:00
3,Торонто,Оттава,2023-01-28 03:00:00
4,Флорида,Лос-Анджелес,2023-01-28 03:00:00
5,Даллас,Нью-Джерси,2023-01-28 04:30:00
6,Ванкувер,Коламбус,2023-01-28 06:00:00
7,Сиэтл,Калгари,2023-01-28 06:00:00
8,Колорадо,Сент-Луис,2023-01-28 23:00:00


In [197]:
cols

{'pos': 'h_pos',
 'sg': 'h_sg',
 'cg': 'h_cg',
 'tp': 'h_tp',
 'win_rate': 'h_win_rate',
 'lose_rate': 'h_lose_rate',
 'win_rate_ft': 'h_win_rate_ft',
 'lose_rate_ft': 'h_lose_rate_ft',
 'tie_ft_rate': 'h_tie_ft_rate',
 'sg_per_game': 'h_sg_per_game',
 'cg_per_game': 'h_cg_per_game'}

In [204]:
future_games = future_games.join(df.set_index('team'), on='home_team')
cols = {}
for column in future_games.columns[3:]:
    cols[column] = "h_" + column
future_games = future_games.rename(columns=cols)
future_games = future_games.join(df.set_index('team'), on='guest_team')
future_games

Unnamed: 0,home_team,guest_team,date,h_pos,h_sg,h_cg,h_tp,h_win_rate,h_lose_rate,h_win_rate_ft,...,sg,cg,tp,win_rate,lose_rate,win_rate_ft,lose_rate_ft,tie_ft_rate,sg_per_game,cg_per_game
0,Айлендерс,Детройт,2023-01-28 03:00:00,21,144,143,51,0.46,0.54,0.869565,...,145,158,50,0.446809,0.553191,0.761905,0.692308,0.276596,3.085106,3.361702
1,Каролина,Сан-Хосе,2023-01-28 03:00:00,2,154,126,68,0.638298,0.361702,0.733333,...,147,187,38,0.285714,0.714286,0.785714,0.714286,0.265306,3.0,3.816327
2,Рейнджерс,Вегас,2023-01-28 03:00:00,10,153,128,60,0.541667,0.458333,0.807692,...,158,142,61,0.591837,0.408163,0.724138,0.85,0.22449,3.22449,2.897959
3,Торонто,Оттава,2023-01-28 03:00:00,3,166,129,68,0.612245,0.387755,0.833333,...,135,153,45,0.446809,0.553191,0.761905,0.884615,0.170213,2.87234,3.255319
4,Флорида,Лос-Анджелес,2023-01-28 03:00:00,20,171,176,52,0.46,0.54,0.956522,...,163,170,60,0.54,0.46,0.703704,0.73913,0.28,3.26,3.4
5,Даллас,Нью-Джерси,2023-01-28 04:30:00,5,171,130,65,0.56,0.44,0.892857,...,168,129,66,0.645833,0.354167,0.774194,0.764706,0.229167,3.5,2.6875
6,Ванкувер,Коламбус,2023-01-28 06:00:00,27,160,191,41,0.395833,0.604167,0.631579,...,125,186,33,0.3125,0.6875,0.666667,0.909091,0.166667,2.604167,3.875
7,Сиэтл,Калгари,2023-01-28 06:00:00,9,172,145,61,0.595745,0.404255,0.821429,...,152,150,55,0.469388,0.530612,0.826087,0.653846,0.265306,3.102041,3.061224
8,Колорадо,Сент-Луис,2023-01-28 23:00:00,16,147,131,55,0.553191,0.446809,0.653846,...,152,177,49,0.469388,0.530612,0.695652,0.884615,0.204082,3.102041,3.612245


In [205]:
for column in cols:
    temp = cols[column]
    cols[column] = 'g' + temp[1:]
future_games = future_games.rename(columns=cols)
future_games

Unnamed: 0,home_team,guest_team,date,h_pos,h_sg,h_cg,h_tp,h_win_rate,h_lose_rate,h_win_rate_ft,...,g_sg,g_cg,g_tp,g_win_rate,g_lose_rate,g_win_rate_ft,g_lose_rate_ft,g_tie_ft_rate,g_sg_per_game,g_cg_per_game
0,Айлендерс,Детройт,2023-01-28 03:00:00,21,144,143,51,0.46,0.54,0.869565,...,145,158,50,0.446809,0.553191,0.761905,0.692308,0.276596,3.085106,3.361702
1,Каролина,Сан-Хосе,2023-01-28 03:00:00,2,154,126,68,0.638298,0.361702,0.733333,...,147,187,38,0.285714,0.714286,0.785714,0.714286,0.265306,3.0,3.816327
2,Рейнджерс,Вегас,2023-01-28 03:00:00,10,153,128,60,0.541667,0.458333,0.807692,...,158,142,61,0.591837,0.408163,0.724138,0.85,0.22449,3.22449,2.897959
3,Торонто,Оттава,2023-01-28 03:00:00,3,166,129,68,0.612245,0.387755,0.833333,...,135,153,45,0.446809,0.553191,0.761905,0.884615,0.170213,2.87234,3.255319
4,Флорида,Лос-Анджелес,2023-01-28 03:00:00,20,171,176,52,0.46,0.54,0.956522,...,163,170,60,0.54,0.46,0.703704,0.73913,0.28,3.26,3.4
5,Даллас,Нью-Джерси,2023-01-28 04:30:00,5,171,130,65,0.56,0.44,0.892857,...,168,129,66,0.645833,0.354167,0.774194,0.764706,0.229167,3.5,2.6875
6,Ванкувер,Коламбус,2023-01-28 06:00:00,27,160,191,41,0.395833,0.604167,0.631579,...,125,186,33,0.3125,0.6875,0.666667,0.909091,0.166667,2.604167,3.875
7,Сиэтл,Калгари,2023-01-28 06:00:00,9,172,145,61,0.595745,0.404255,0.821429,...,152,150,55,0.469388,0.530612,0.826087,0.653846,0.265306,3.102041,3.061224
8,Колорадо,Сент-Луис,2023-01-28 23:00:00,16,147,131,55,0.553191,0.446809,0.653846,...,152,177,49,0.469388,0.530612,0.695652,0.884615,0.204082,3.102041,3.612245


In [206]:
future_games = future_games.join(table_home.set_index('team'), on='home_team')
future_games = future_games.join(table_guest.set_index('team'), on='guest_team')
future_games

Unnamed: 0,home_team,guest_team,date,h_pos,h_sg,h_cg,h_tp,h_win_rate,h_lose_rate,h_win_rate_ft,...,cg_per_game_home,tie_ft_rate_home,sg_guest,cg_guest,tp_guest,win_rate_guest,lose_rate_guest,sg_per_game_guest,cg_per_game_guest,tie_ft_rate_guest
0,Айлендерс,Детройт,2023-01-28 03:00:00,21,144,143,51,0.46,0.54,0.869565,...,2.5,0.166667,70,82,23,0.409091,0.590909,3.181818,3.727273,0.363636
1,Каролина,Сан-Хосе,2023-01-28 03:00:00,2,154,126,68,0.638298,0.361702,0.733333,...,2.714286,0.190476,74,88,21,0.36,0.64,2.96,3.52,0.2
2,Рейнджерс,Вегас,2023-01-28 03:00:00,10,153,128,60,0.541667,0.458333,0.807692,...,3.0,0.32,82,67,33,0.681818,0.318182,3.727273,3.045455,0.318182
3,Торонто,Оттава,2023-01-28 03:00:00,3,166,129,68,0.612245,0.387755,0.833333,...,2.384615,0.307692,57,76,18,0.363636,0.636364,2.590909,3.454545,0.227273
4,Флорида,Лос-Анджелес,2023-01-28 03:00:00,20,171,176,52,0.46,0.54,0.956522,...,3.095238,0.142857,81,86,30,0.52,0.48,3.24,3.44,0.36
5,Даллас,Нью-Джерси,2023-01-28 04:30:00,5,171,130,65,0.56,0.44,0.892857,...,2.913043,0.304348,93,59,38,0.782609,0.217391,4.043478,2.565217,0.217391
6,Ванкувер,Коламбус,2023-01-28 06:00:00,27,160,191,41,0.395833,0.604167,0.631579,...,3.913043,0.173913,43,78,10,0.190476,0.809524,2.047619,3.714286,0.142857
7,Сиэтл,Калгари,2023-01-28 06:00:00,9,172,145,61,0.595745,0.404255,0.821429,...,2.958333,0.291667,70,73,25,0.375,0.625,2.916667,3.041667,0.375
8,Колорадо,Сент-Луис,2023-01-28 23:00:00,16,147,131,55,0.553191,0.446809,0.653846,...,2.791667,0.208333,81,92,27,0.52,0.48,3.24,3.68,0.2


In [165]:
df

Unnamed: 0,team,pos,sg,cg,tp,win_rate,lose_rate,win_rate_ft,lose_rate_ft,tie_ft_rate,sg_per_game,cg_per_game
0,Бостон,1,183,101,80,0.791667,0.208333,0.868421,0.6,0.1875,3.8125,2.104167
1,Каролина,2,154,126,68,0.638298,0.361702,0.733333,0.529412,0.340426,3.276596,2.680851
2,Торонто,3,166,129,68,0.612245,0.387755,0.833333,0.578947,0.265306,3.387755,2.632653
3,Нью-Джерси,4,168,129,66,0.645833,0.354167,0.774194,0.764706,0.229167,3.5,2.6875
4,Даллас,5,171,130,65,0.56,0.44,0.892857,0.590909,0.24,3.42,2.6
5,Виннипег,6,162,131,63,0.62,0.38,0.774194,0.947368,0.16,3.24,2.62
6,Тампа-Бэй,7,170,139,63,0.659574,0.340426,0.806452,0.9375,0.148936,3.617021,2.957447
7,Вегас,8,158,142,61,0.591837,0.408163,0.724138,0.85,0.22449,3.22449,2.897959
8,Сиэтл,9,172,145,61,0.595745,0.404255,0.821429,0.736842,0.212766,3.659574,3.085106
9,Рейнджерс,10,153,128,60,0.541667,0.458333,0.807692,0.636364,0.270833,3.1875,2.666667


In [63]:
future_games['home_team'] = future_games['home_team'].apply(lambda key: team_id[key])
future_games['guest_team'] = future_games['guest_team'].apply(lambda key: team_id[key])
future_games

Unnamed: 0,home_team,guest_team,date
0,21,22,2023-01-28 03:00:00
1,2,28,2023-01-28 03:00:00
2,10,8,2023-01-28 03:00:00
3,3,25,2023-01-28 03:00:00
4,20,11,2023-01-28 03:00:00
5,5,4,2023-01-28 04:30:00
6,27,32,2023-01-28 06:00:00
7,9,17,2023-01-28 06:00:00
8,16,24,2023-01-28 23:00:00


In [15]:
first = {'~AA': 'Ysp5M8Dr', 'AD': '1674777600', 'ADE': '1674777600', 'AB': '3', 'CR': '3', 'AC': '10', 'CX': 'Монреаль', 'RW': '0', 'AX': '1', 'AO': '1674787437', 'BX': '-1', 'WL': '', 'WQ': '', 'WM': 'МОН', 'AE': 'Монреаль', 'FH': 'Монреаль', 'JA': '6mRmaUfq', 'AZ': '0', 'GRA': '0', 'AG': '3', 'AT': '3', 'BG': '0', 'BA': '1', 'BC': '2', 'BE': '0', 'OA': 'rmLcPRlC-UNwYOE6j.png', 'WN': 'ДЕТ', 'AF': 'Детройт', 'FK': 'Детройт', 'JB': 'KjViblAk', 'AS': '2', 'GRB': '0', 'AH': '4', 'AU': '3', 'BH': '1', 'BB': '1', 'BD': '2', 'BF': '0', 'OB': 'vipsZYT0-8zhLUqwC.png', 'AW': '1'}
second = {'~AA': '8YaDSZQt', 'AD': '1674777600', 'ADE': '1674777600', 'AB': '3', 'CR': '3', 'AC': '3', 'CX': 'Тампа-Бэй', 'RW': '0', 'AX': '1', 'AO': '1674787887', 'BX': '-1', 'WL': '', 'WQ': '', 'WM': 'ТАМ', 'AE': 'Тампа-Бэй', 'FH': 'Тампа-Бэй', 'JA': 'ju3Hj6vi', 'AS': '1', 'AZ': '1', 'GRA': '0', 'AG': '3', 'BA': '1', 'BC': '0', 'BE': '2', 'OA': 'dE1G4mne-UPCTXNVN.png', 'WN': 'БОС', 'AF': 'Бостон', 'FK': 'Бостон', 'JB': 'UV1LkQgc', 'GRB': '0', 'AH': '2', 'BB': '0', 'BD': '1', 'BF': '1', 'OB': 'UeHV59Pq-Kf1zyLao.png', 'AW': '1'}
third = {'~AA': 'nPbHRgtm', 'AD': '1674777600', 'ADE': '1674777600', 'AB': '3', 'CR': '3', 'AC': '11', 'CX': 'Вашингтон', 'RW': '0', 'AX': '1', 'AO': '1674787857', 'BX': '-1', 'WL': '', 'WQ': '', 'WN': 'ПИТ', 'AF': 'Питтсбург', 'FK': 'Питтсбург', 'JB': 'Gn0Tm4O9', 'AZ': '0', 'GRB': '0', 'AH': '2', 'AU': '2', 'BH': '0', 'BJ': '1', 'BB': '0', 'BD': '1', 'BF': '1', 'OB': 'nJ0Qys8k-nRjPT3hI.png', 'WM': 'ВАШ', 'AE': 'Вашингтон', 'FH': 'Вашингтон', 'JA': 'xd1Plp93', 'AS': '1', 'GRA': '0', 'AG': '3', 'AT': '2', 'BG': '0', 'BI': '2', 'BA': '1', 'BC': '0', 'BE': '1', 'OA': 'dIknNK96-lpfS7F4s.png', 'AW': '1'}

In [8]:
with open("first.json", "w") as outfile:
    json.dump(first, outfile)

In [58]:
with open("team_id.json", "w") as outfile:
    json.dump(team_id, outfile)

In [16]:
with open("third.json", "w") as outfile:
    json.dump(third, outfile)

In [20]:
set(first.keys()) - set(second.keys())

{'AT', 'AU', 'BG', 'BH'}

In [13]:
set(second.keys())

{'AB',
 'AC',
 'AD',
 'ADE',
 'AE',
 'AF',
 'AG',
 'AH',
 'AO',
 'AS',
 'AW',
 'AX',
 'AZ',
 'BA',
 'BB',
 'BC',
 'BD',
 'BE',
 'BF',
 'BX',
 'CR',
 'CX',
 'FH',
 'FK',
 'GRA',
 'GRB',
 'JA',
 'JB',
 'OA',
 'OB',
 'RW',
 'WL',
 'WM',
 'WN',
 'WQ',
 '~AA'}

In [21]:
results = pd.read_json("data/results_1.json")
results

Unnamed: 0,home_team,guest_team,home_score,guest_score,tie_in_ft,aot,shootout,date
0,Вашингтон,Питтсбург,3,2,1,0,1,2023-01-27 03:00:00
1,Монреаль,Детройт,3,4,1,1,0,2023-01-27 03:00:00
2,Тампа-Бэй,Бостон,3,2,0,0,0,2023-01-27 03:00:00
3,Виннипег,Баффало,2,3,0,0,0,2023-01-27 04:00:00
4,Миннесота,Филадельфия,3,2,1,1,0,2023-01-27 04:00:00
5,Нэшвилл,Нью-Джерси,6,4,0,0,0,2023-01-27 04:00:00
6,Аризона,Сент-Луис,5,0,0,0,0,2023-01-27 05:00:00
7,Калгари,Чикаго,1,5,0,0,0,2023-01-27 05:00:00
8,Колорадо,Анахайм,3,5,0,0,0,2023-01-27 05:00:00


In [209]:
pd.read_json('data/next_tour_games.json')

Unnamed: 0,home_team,guest_team,date,h_pos,h_sg,h_cg,h_tp,h_win_rate,h_lose_rate,h_win_rate_ft,...,cg_per_game_home,tie_ft_rate_home,sg_guest,cg_guest,tp_guest,win_rate_guest,lose_rate_guest,sg_per_game_guest,cg_per_game_guest,tie_ft_rate_guest
0,Айлендерс,Детройт,2023-01-28 03:00:00,21,144,143,51,0.46,0.54,0.869565,...,2.5,0.166667,70,82,23,0.409091,0.590909,3.181818,3.727273,0.363636
1,Каролина,Сан-Хосе,2023-01-28 03:00:00,2,154,126,68,0.638298,0.361702,0.733333,...,2.714286,0.190476,74,88,21,0.36,0.64,2.96,3.52,0.2
2,Рейнджерс,Вегас,2023-01-28 03:00:00,10,153,128,60,0.541667,0.458333,0.807692,...,3.0,0.32,82,67,33,0.681818,0.318182,3.727273,3.045455,0.318182
3,Торонто,Оттава,2023-01-28 03:00:00,3,166,129,68,0.612245,0.387755,0.833333,...,2.384615,0.307692,57,76,18,0.363636,0.636364,2.590909,3.454545,0.227273
4,Флорида,Лос-Анджелес,2023-01-28 03:00:00,20,171,176,52,0.46,0.54,0.956522,...,3.095238,0.142857,81,86,30,0.52,0.48,3.24,3.44,0.36
5,Даллас,Нью-Джерси,2023-01-28 04:30:00,5,171,130,65,0.56,0.44,0.892857,...,2.913043,0.304348,93,59,38,0.782609,0.217391,4.043478,2.565217,0.217391
6,Ванкувер,Коламбус,2023-01-28 06:00:00,27,160,191,41,0.395833,0.604167,0.631579,...,3.913043,0.173913,43,78,10,0.190476,0.809524,2.047619,3.714286,0.142857
7,Сиэтл,Калгари,2023-01-28 06:00:00,9,172,145,61,0.595745,0.404255,0.821429,...,2.958333,0.291667,70,73,25,0.375,0.625,2.916667,3.041667,0.375
8,Колорадо,Сент-Луис,2023-01-28 23:00:00,16,147,131,55,0.553191,0.446809,0.653846,...,2.791667,0.208333,81,92,27,0.52,0.48,3.24,3.68,0.2


In [3]:
from collections import Counter
s = "anagram"
d = Counter(s)
d

Counter({'a': 3, 'n': 1, 'g': 1, 'r': 1, 'm': 1})

In [6]:
nums = [1, 2 ,3, 4]
data = {value:index for index, value in enumerate(nums)}
data

{1: 0, 2: 1, 3: 2, 4: 3}

In [7]:
list(enumerate(nums))

[(0, 1), (1, 2), (2, 3), (3, 4)]

In [12]:
strs = ["eat","tea","tan","ate","nat","bat"]
def get_sorted(word):
            return ''.join(sorted(word, key=str.lower))
cnts = [(get_sorted(word), word) for word in strs]

In [17]:
cnts = [(''.join(sorted(word, key=str.lower)), word) for word in strs]
ans = {}
for item in cnts:
    if item[0] not in ans:
        ans[item[0]] = [item[1]]
    else:
        ans[item[0]].append(item[1])

In [20]:
list(ans.values())

[['eat', 'tea', 'ate'], ['tan', 'nat'], ['bat']]

In [22]:
import collections
ans = collections.defaultdict(list)

for s in strs:
    count = [0] * 26
    for c in s:
        count[ord(c) - ord("a")] += 1
    ans[tuple(count)].append(s)
ans.values()

dict_values([['eat', 'tea', 'ate'], ['tan', 'nat'], ['bat']])

In [25]:
5 // 3

1

In [36]:
board = [["5","3",".",".","7",".",".",".","."],["6",".",".","1","9","5",".",".","."],[".","9","8",".",".",".",".","6","."],["8",".",".",".","6",".",".",".","3"],["4",".",".","8",".","3",".",".","1"],["7",".",".",".","2",".",".",".","6"],[".","6",".",".",".",".","2","8","."],[".",".",".","4","1","9",".",".","5"],[".",".",".",".","8",".",".","7","9"]]

In [37]:
res = {}
ans = True
for i in range(9):
    for j in range(9):
        number_of_box = 3 * (i // 3) + j // 3
        if board[i][j] not in res:
            res[board[i][j]] = [(i,j,number_of_box)]
        else:
            res[board[i][j]].append((i,j,number_of_box))
for value in res:
    if len(set(res[value])) != len(res[value]):
        ans= False

In [38]:
res

{'5': [(0, 0, 0), (1, 5, 1), (7, 8, 8)],
 '3': [(0, 1, 0), (3, 8, 5), (4, 5, 4)],
 '.': [(0, 2, 0),
  (0, 3, 1),
  (0, 5, 1),
  (0, 6, 2),
  (0, 7, 2),
  (0, 8, 2),
  (1, 1, 0),
  (1, 2, 0),
  (1, 6, 2),
  (1, 7, 2),
  (1, 8, 2),
  (2, 0, 0),
  (2, 3, 1),
  (2, 4, 1),
  (2, 5, 1),
  (2, 6, 2),
  (2, 8, 2),
  (3, 1, 3),
  (3, 2, 3),
  (3, 3, 4),
  (3, 5, 4),
  (3, 6, 5),
  (3, 7, 5),
  (4, 1, 3),
  (4, 2, 3),
  (4, 4, 4),
  (4, 6, 5),
  (4, 7, 5),
  (5, 1, 3),
  (5, 2, 3),
  (5, 3, 4),
  (5, 5, 4),
  (5, 6, 5),
  (5, 7, 5),
  (6, 0, 6),
  (6, 2, 6),
  (6, 3, 7),
  (6, 4, 7),
  (6, 5, 7),
  (6, 8, 8),
  (7, 0, 6),
  (7, 1, 6),
  (7, 2, 6),
  (7, 6, 8),
  (7, 7, 8),
  (8, 0, 6),
  (8, 1, 6),
  (8, 2, 6),
  (8, 3, 7),
  (8, 5, 7),
  (8, 6, 8)],
 '7': [(0, 4, 1), (5, 0, 3), (8, 7, 8)],
 '6': [(1, 0, 0), (2, 7, 2), (3, 4, 4), (5, 8, 5), (6, 1, 6)],
 '1': [(1, 3, 1), (4, 8, 5), (7, 4, 7)],
 '9': [(1, 4, 1), (2, 1, 0), (7, 5, 7), (8, 8, 8)],
 '8': [(2, 2, 0), (3, 0, 3), (4, 3, 4), (6, 7, 8), (