In [1]:
import pandas as pd
import json
from pandas import json_normalize
from tqdm import tqdm_notebook as tqdm
import numpy as np
import requests
import ast


pd.set_option('display.precision',6)
pd.set_option('display.max_columns', None)
pd.options.mode.chained_assignment = None

# response = requests.get('https://bet2face.com/api/getEventsSummaryData/page/', verify=False)
response = requests.get('https://bet2face.com/api/getFightersSummaryData/page/', verify=False)
data = response.json()
data
# total_pages = data['total_pages']
# total_pages



{'total_pages': 73,
 'current_page': 1,
 'fighters': [{'id': 1,
   'name': 'Tanner Boser',
   'rank': 16,
   'weightCategory': {'id': 9, 'name': 'Тяжелый вес'},
   'dateOfBirth': '1991-08-02',
   'weight': 115.67,
   'height': 187.96,
   'armSpan': 190.5,
   'legSwing': None,
   'wins': 19,
   'looses': 7,
   'draws': 1,
   'significantStrikesLandPerMinute': '4.54',
   'significantStrikesMissedPerMinute': '2.70',
   'sigStrikesAccuracy': 55,
   'significantStrikesDefendPercent': 63,
   'submissionAttemptsPer15minutes': None,
   'takedownsPer15min': None,
   'takedownsAccuracy': None,
   'takedownsDefendPercent': 100,
   'knockdownsPerFight': '0.00',
   'avgFightTime': None,
   'city': 'Bonnyville',
   'country': 'Canada',
   'timezone': 'America/Edmonton',
   'methods': {'winMethods': {'DEC': '1', 'KO': '2'},
    'looseMethods': {'DEC': '2'}},
   'disciplines': [],
   'photos': [{'url': 'http://bet2face.com/uploads/images/fighters/tannerboser-5d3570e3c8d49.png',
     'isFullFace': True

In [2]:
# download fighters data
def download_fighters():
    df = pd.DataFrame()
    total_pages = requests.get('https://bet2face.com/api/getFightersSummaryData/page/', verify=False).json()['total_pages']
    print('Total pages:', total_pages)
    
    for i in tqdm(range(1, total_pages+1)):
        response2 = requests.get('https://bet2face.com/api/getFightersSummaryData/page/{}'.format(i), verify=False)
        data = response2.json()
        if data['fighters'] != None:
            chunk = json_normalize(data['fighters'])
            df = pd.concat([df, chunk], sort=True)

    df = df.reset_index(drop=True)
    df.to_csv('0.fighters_raw.csv')
    print('Downloading finished')


def process_fighters():
    df = pd.read_csv('data/0.fighters_raw.csv', index_col=0)
    print('original shape', df.shape)
    
    df['dateOfBirth'] = pd.to_datetime(df['dateOfBirth'])
    df['birthYear'] = df['dateOfBirth'].dt.year

    df.drop([i for i in df.columns if i.startswith('methods')], axis=1, inplace=True)

    df['height'] = df['height'].fillna(df['height'].median())
    df['dateOfBirth'] = pd.to_datetime(df['dateOfBirth'])

    df['birthYear'] = df['dateOfBirth'].dt.year
    df['birthYear'] = df.birthYear.fillna(df.birthYear.median())
    df['avgFightTime'].replace(":", '.', regex=True, inplace=True)
    df['avgFightTime'] = df['avgFightTime'].fillna(df['avgFightTime'].median())
    df['knockdownsPerFight'].fillna(0, inplace=True)
    df['weight'].fillna(df['weight'].median(), inplace=True)
    df['armSpan'].fillna(df['armSpan'].median(), inplace=True)
    df['legSwing'].fillna(df['legSwing'].median(), inplace=True)

    df["win%"] = 100 * (df["wins"] / (df["wins"] + df["looses"] + df["draws"]))

#     df = df.drop(['twitter', 'website', 'instagram', 'careerDisclosedEarnings', 'dateOfBirth'], axis=1)
    
    df['takedownsPer15min'].fillna(0, inplace=True)
    df['takedownsDefendPercent'].fillna(0, inplace=True)
    df['takedownsAccuracy'].fillna(0, inplace=True)
    df['submissionAttemptsPer15minutes'].fillna(0, inplace=True)

    df = df.loc[~((df['significantStrikesDefendPercent'] == 0) &
               (df['significantStrikesLandPerMinute'] == 0) &
               (df['significantStrikesMissedPerMinute'] == 0) &
               (df['submissionAttemptsPer15minutes'] == 0) &
               (df['takedownsAccuracy'] == 0) &
               (df['takedownsDefendPercent'] == 0) &
               (df['takedownsPer15min'] == 0) 
              )]
    
    
    df.to_csv('data/1.fighters_processed.csv')
    print('final shape:', df.shape)

download_fighters()
# process_fighters()



Total pages: 73


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for i in tqdm(range(1, total_pages+1)):


  0%|          | 0/73 [00:00<?, ?it/s]







Downloading finished


In [None]:
def download_events():
    # # Download data from bet2face
    df = pd.DataFrame()
    total_pages = requests.get('https://bet2face.com/api/getEventsSummaryData/page/', verify=False).json()['total_pages']
    print('Total pages:', total_pages)
    
    for i in tqdm(range(1, total_pages+1)):
        response = requests.get('https://bet2face.com/api/getEventsSummaryData/page/{}'.format(i), verify=False)
        response_json = response.json()
        events = response_json['events']
#         if i != 101:
        for event in events:
            chunk = json_normalize(event)
            fighterId_1,  fighterId_2 = chunk['fighters'][0][0]['fighterId'], chunk['fighters'][0][1]['fighterId']
            chunk['fighterId_1'] = fighterId_1
            chunk['fighterId_2'] = fighterId_2
            df = pd.concat([df, chunk], sort=True)
    df.to_csv('0.events_raw.csv')

def process_events():
    df = pd.read_csv('data/0.events_raw.csv', index_col=0)
    df = df.reset_index()
    pd.to_datetime(df['eventDate.date']).dt.year.value_counts()

    # Добавить кэффы в df
    for i in df.index[:]:
        try:
            avgodds = df.loc[i, 'avgOdds']

            if avgodds == '[]':
                continue

            # Преобразую данные о коэффах из строки
            ids1, odd1 = int(avgodds.split()[1].split(',')[0]), float(avgodds.split()[3].split('}')[0])
            ids2, odd2 = int(avgodds.split()[5].split(',')[0]), float(avgodds.split()[7].split('}')[0])

            # Проверить совпадает ли fighterId_1 с ids1 (который в avgOdds)
            if df.loc[i, 'fighterId_1'] == ids1:
                df.at[i, 'odd1'] = odd1
                df.at[i, 'odd2'] = odd2

            if df.loc[i, 'fighterId_1'] == ids2:
                df.at[i, 'odd1'] = odd2
                df.at[i, 'odd2'] = odd1

        except IndexError:
            print(avgodds)

    print('percent of fighter1 winners:', np.mean(df.winnerId == df.fighterId_1))

    # randomly swap fighter1 and fighter2 for half of the dataset to create "negative cases"
    swap_indices = np.random.choice(len(df), size = len(df) // 2, replace = False)

    Fighter1_ColInd = np.where(df.columns == 'fighterId_1')[0][0]
    Fighter2_ColInd = np.where(df.columns == 'fighterId_2')[0][0]
    df.iloc[swap_indices, [Fighter1_ColInd, Fighter2_ColInd]] = df.iloc[swap_indices, [Fighter2_ColInd, Fighter1_ColInd]].values # swap fighters
    
    odd1_ColInd = np.where(df.columns == 'odd1')[0][0]
    odd2_ColInd = np.where(df.columns == 'odd2')[0][0]
    df.iloc[swap_indices, [odd1_ColInd, odd2_ColInd]] = df.iloc[swap_indices, [odd2_ColInd, odd1_ColInd]].values # swap odds

    # winner equal 1 if winner is fighter 1
    df['winner'] = df['winnerId'] == df['fighterId_1']
    df['winner'] = df['winner'].astype(np.int8)
    df.shape
    print('percent of fighter1 winners after swappint:', np.mean(df.winnerId == df.fighterId_1))
    df.to_csv('data/1.events_processed.csv')
      
download_events()
# process_events()



Total pages: 121


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for i in tqdm(range(1, total_pages+1)):


  0%|          | 0/121 [00:00<?, ?it/s]





In [3]:
df = pd.read_csv('data/0.events_raw.csv')
df

Unnamed: 0.1,Unnamed: 0,avgOdds,city,completed,country,duration,eventDate.date,eventDate.timezone,eventDate.timezone_type,fighterId_1,fighterId_2,fighters,id,link,name,rounds,timezone,weightCategory.id,weightCategory.name,winMethods,winnerId
0,0,[],Denver,True,USA,104.0,1993-11-12 00:00:00.000000,Europe/Berlin,3,1646,1923,"[{'fighterId': 1646, 'fightStats': {'hitsTotal...",5201,http://www.ufcstats.com/fight-details/64139d1d...,UFC 1,1.0,America/Denver,7,Средний вес,['SUB'],1646.0
1,0,[],Denver,True,USA,52.0,1993-11-12 00:00:00.000000,Europe/Berlin,3,1777,1883,"[{'fighterId': 1777, 'fightStats': {'hitsTotal...",5202,http://www.ufcstats.com/fight-details/00b07967...,UFC 1,1.0,America/Denver,8,Полутяжелый вес,['SUB'],1777.0
2,0,[],Denver,True,USA,59.0,1993-11-12 00:00:00.000000,Europe/Berlin,3,1908,1923,"[{'fighterId': 1908, 'fightStats': {'hitsTotal...",5203,http://www.ufcstats.com/fight-details/ac7ca2ec...,UFC 1,1.0,America/Denver,9,Тяжелый вес,['KO'],1923.0
3,0,[],Denver,True,USA,57.0,1993-11-12 00:00:00.000000,Europe/Berlin,3,1631,1646,"[{'fighterId': 1631, 'fightStats': {'hitsTotal...",5204,http://www.ufcstats.com/fight-details/ffd16691...,UFC 1,1.0,America/Denver,8,Полутяжелый вес,['SUB'],1646.0
4,0,[],Denver,True,USA,138.0,1993-11-12 00:00:00.000000,Europe/Berlin,3,1646,1924,"[{'fighterId': 1646, 'fightStats': {'hitsTotal...",5205,http://www.ufcstats.com/fight-details/cecdc0da...,UFC 1,1.0,America/Denver,7,Средний вес,['SUB'],1646.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7095,0,[],Las Vegas,False,USA,,2021-02-27 00:00:00.000000,Europe/Berlin,3,309,3416,"[{'fighterId': 309, 'fightStats': [], 'roundSt...",40432,http://www.ufcstats.com/fight-details/5d11136d...,UFC Fight Night,,America/Los_Angeles,8,Полутяжелый вес,[],
7096,0,"[{'fighterId': 53, 'value': 2}, {'fighterId': ...",Las Vegas,False,USA,,2021-02-27 01:45:00.000000,Europe/Berlin,3,53,275,"[{'fighterId': 53, 'fightStats': [], 'roundSta...",40492,http://www.ufcstats.com/fight-details/dd3dc5fe...,UFC Fight Night,,America/Los_Angeles,6,Полусредний вес,[],
7097,0,"[{'fighterId': 145, 'value': 3.95}, {'fighterI...",Las Vegas,False,USA,,2021-02-27 03:45:00.000000,Europe/Berlin,3,145,499,"[{'fighterId': 145, 'fightStats': [], 'roundSt...",40507,http://www.ufcstats.com/fight-details/8ad9b2f0...,UFC Fight Night,,America/Los_Angeles,10,Женский минимальный вес,[],
7098,0,"[{'fighterId': 246, 'value': 2.75}, {'fighterI...",Las Vegas,True,USA,208.0,2021-03-06 00:00:00.000000,Europe/Berlin,3,246,2073,"[{'fighterId': 246, 'fightStats': {'hitsTotal'...",40452,http://www.ufcstats.com/fight-details/7ad09f88...,UFC 259,3.0,America/Los_Angeles,6,Полусредний вес,['SUB'],2073.0
