In [95]:
import os
import time
import gc
import json
import requests

import pandas as pd
import numpy as np
import io
from pandas.io.json import json_normalize

%matplotlib inline
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

pd.options.display.float_format = "{:.2f}".format
pd.set_option('display.html.use_mathjax', False) # от italic шрифтов

In [47]:
def get_data_rg(start_date, end_date):
    '''FORMAT: MM/DD/YYYY eg. '10/24/2019'
    '''
    response = requests.get('https://resultsdb-api.rotogrinders.com/api/contests?start='+start_date+'&end='+end_date+'&lean=true')
    json_data = response.json()
    return json_data

'''def map_sport(sportname):
    sportname = sportname.lower()
    if sportname == 'nfl':
        return 1
    if sportname == 'mlb':
        return 2
    if sportname == 'nba':
        return 3
    if sportname == 'nhl':
        return 4
    if sportname == 'golf' or sportname == 'apt':
        return 6
    return ValueError('Sport type is not tracked')
'''

def rg_df(json_data):
    '''Supported sport types are:
    NFL, NBA, MLB, NHL, Golf
    '''
    df = pd.DataFrame(json_data, columns = ['name',
                                            'sport',
                                            'gameCount',
                                            'start',                                       
                                            'prizePool',
                                            'entryFee',
                                            'maxEntriesPerUser',
                                            'maxEntries',
                                            'entryCount'
                                           ])
    df['sport'] = df['sport'].map({1:'NFL',
                                   2:'MLB',
                                   3:'NBA',
                                   4:'NHL',
                                   6:'GOLF'})
    df['start'] = df['start'].apply(lambda x:x[:-1])
    df['start'] = pd.to_datetime(df['start'])
    df.insert(3, 'dayofweek', df['start'].dt.day_name())
    df.dropna(inplace=True) # NA только в NHL играх, нет mE и mEPerUser, на сайте игр нет, хотя в логах played=true
    df['maxEntries'] = df['maxEntries'].astype('int')
    df['maxEntriesPerUser'] = df['maxEntriesPerUser'].astype('int')
    df['name'] = df['name'].str.lower()
    return df

In [30]:
json_data = get_data_rg('10/14/2019', '11/07/2019')

In [89]:
df = rg_df(json_data)

In [93]:
nfl = df[df.sport == 'NFL']
nhl = df[df.sport == 'NHL']
golf = df[df.sport == 'GOLF']
mlb = df[df.sport == 'MLB']
nba = df[df.sport == 'NBA']

In [96]:
nfl.head(5)

Unnamed: 0,name,sport,gameCount,dayofweek,start,prizePool,entryFee,maxEntriesPerUser,maxEntries,entryCount
21,nfl showdown $142k luxury box [$50k to 1st] (d...,NFL,1,Tuesday,2019-10-15 00:15:00,142000.0,1500.0,3,100,101
22,$10m fantasy football world championship quali...,NFL,1,Tuesday,2019-10-15 00:15:00,64755.55,5375.0,1,14,15
23,nfl showdown $125k first down [20 entry max] (...,NFL,1,Tuesday,2019-10-15 00:15:00,125000.0,1.0,20,148632,148633
24,nfl showdown $60k huddle [single entry] (det v...,NFL,1,Tuesday,2019-10-15 00:15:00,60000.0,5.0,1,14268,14269
25,nfl showdown $250k deep threat [$50k to 1st] (...,NFL,1,Tuesday,2019-10-15 00:15:00,250000.0,180.0,46,1543,1544


In [34]:
nfl.describe()

Unnamed: 0,prizePool,entryFee,maxEntriesPerUser,maxEntries,entryCount
count,82.0,82.0,82.0,82.0,82.0
mean,63221.41,239.47,19.51,13685.78,13686.78
std,234061.65,671.99,38.48,37547.38,37547.38
min,5000.0,0.1,1.0,10.0,11.0
25%,6000.0,5.0,1.0,111.0,112.0
50%,12000.0,25.0,3.0,689.0,690.0
75%,25000.0,100.0,20.0,4562.25,4563.25
max,1500000.0,5375.0,150.0,176470.0,176471.0


In [35]:
nfl.prizePool.sum()

5184155.55

In [36]:
doubleups = nfl['name'].str.contains('Double Up').sum()
print ('Double ups количество в неделю: {} \nПроцент от общего числа турниров: {:.0f}%'.format(doubleups, doubleups/nfl.name.count()*100))

Double ups количество в неделю: 25 
Процент от общего числа турниров: 30%


In [97]:
nhl.head(5)

Unnamed: 0,name,sport,gameCount,dayofweek,start,prizePool,entryFee,maxEntriesPerUser,maxEntries,entryCount
2,nhl $8k forecheck [20 entry max],NHL,7,Monday,2019-10-14 17:00:00,8000.0,4.0,20,2378,2379
3,nhl $5.2k power forward [2 entry max],NHL,7,Monday,2019-10-14 17:00:00,5200.0,88.0,2,67,68
4,nhl $15.6k sin bin [single entry],NHL,7,Monday,2019-10-14 17:00:00,15600.0,1500.0,1,11,12
5,nhl $40k blue line [$10k to 1st],NHL,7,Monday,2019-10-14 17:00:00,40000.0,222.0,6,200,201
6,nhl $6.85k crease master,NHL,7,Monday,2019-10-14 17:00:00,6850.0,691.0,1,11,12


In [207]:
nhl.describe()

Unnamed: 0,prizePool,entryFee,maxEntriesPerUser,maxEntries,entryCount
count,19.0,19.0,19.0,19.0,19.0
mean,19127.37,199.66,24.32,2197.47,2192.89
std,29067.91,411.19,45.69,3430.9,3432.72
min,5000.0,0.5,1.0,10.0,11.0
25%,5000.0,5.0,1.0,169.0,170.0
50%,8000.0,20.0,3.0,583.0,584.0
75%,14810.0,104.5,20.0,1703.5,1704.5
max,100000.0,1500.0,150.0,11890.0,11891.0


In [46]:
nhl.prizePool.sum()

140650.0

In [217]:
doubleups = nhl['name'].str.contains('Double Up').sum()
print ('Double ups количество в неделю: {} \nПроцент от общего числа турниров: {:.0f}%'.format(doubleups, doubleups/nhl.name.count()*100))

Double ups количество в неделю: 1 
Процент от общего числа турниров: 5%


In [64]:
print (json_data[17]['entryCount'], '\n', json_normalize(json_data[17]['prizes']))

11891 
     cash  maxFinish  minFinish tierPayoutDescriptions.Cash  value
0   1000          1          1                   $1,000.00   1000
1    350          2          2                     $350.00    350
2    200          3          3                     $200.00    200
3    100          4          4                     $100.00    100
4     50          5          5                      $50.00     50
5     30          7          6                      $30.00     30
6     20         10          8                      $20.00     20
7     15         13         11                      $15.00     15
8     10         18         14                      $10.00     10
9      8         28         19                       $8.00      8
10     6         48         29                       $6.00      6
11     5         73         49                       $5.00      5
12     4        103         74                       $4.00      4
13     3        183        104                       $3.00      3
14

In [62]:
json_normalize(json_data[19]['prizes'])

Unnamed: 0,cash,maxFinish,minFinish,tierPayoutDescriptions.Cash,value
0,25000,1,1,"$25,000.00",25000
1,10000,2,2,"$10,000.00",10000
2,5000,3,3,"$5,000.00",5000
3,3500,4,4,"$3,500.00",3500
4,2500,5,5,"$2,500.00",2500
5,2000,7,6,"$2,000.00",2000
6,1500,10,8,"$1,500.00",1500
7,1000,13,11,"$1,000.00",1000
8,700,20,14,$700.00,700
9,600,29,21,$600.00,600
