# Scrape API Fight IDs

## Imports

In [11]:
import pandas as pd
import requests
import json
import re
import time
from bs4 import BeautifulSoup
import os
import errno
import time
import datetime

In [2]:
def scrape_all_event_fight_ids(events_df):
    create_fight_stat_ids_csv()
    for index, event in events_df.iterrows():
#         if index%5==0:
#             print(f'{index}: {event.url}')
        
        url = event.url
        event_id = event.event_id
        list_of_fight_stat_ids = scrape_fight_stat_ids_from_one_event(event_id)
        try:
            if len(list_of_fight_stat_ids) > 0:
                write_fight_stat_ids_to_csv(url, event_id, list_of_fight_stat_ids)
        except:
            print(f"There was a write error for {event.url}")
        time.sleep(3)

In [3]:
def scrape_fight_stat_ids_from_one_event(event_id):
    full_url = f'http://liveapi.fightmetric.com/V1/{event_id}/Fnt.json'
    
    try:
        response = requests.get(full_url)
        structured = json.loads(response.text)
        list_of_fight_stat_ids = [x['FightID'] for x in structured['FMLiveFeed']['Fights']]
    except:
        list_of_fight_stat_ids = []
    return list_of_fight_stat_ids

In [4]:
def create_fight_stat_ids_csv():
    # Create the header
    try:
        os.remove('event_fight_stat_ids.csv')
    except:
        pass
    
    with open('event_fight_stat_ids.csv', 'a+', newline='') as f:
        f.write(f'url,event_id,fight_stat_id')

In [5]:
def write_fight_stat_ids_to_csv(url, event_id, list_of_fight_stat_ids):        
    for fight_stat_id in list_of_fight_stat_ids:
        with open('event_fight_stat_ids.csv', 'a+', newline='') as f:
            f.write(f'\n{url},{event_id},{fight_stat_id}')

## Pull in Event IDs and Scrape

In [6]:
!ls ../Scraped_Pre_Data

event_fight_stat_ids.csv event_ids.csv


In [19]:
events = pd.read_csv('../Scraped_Pre_Data/event_ids.csv')
events = events[events.event_id != 0]
events.head()

Unnamed: 0,url,event_id
0,/event/fight-night-asia-june-2018,872
3,/event/UFC-225,866
4,/event/fight-night-utica-2018,868
5,/event/fight-night-liverpool-2018,870
6,/event/fight-night-santiago-2018,869


In [8]:
scrape_all_event_fight_ids(events)

# Update this to also pull the full JSON for each V1 url

In [21]:
def scrape_all_v1_urls(events_df):
    for index, event in events_df.iterrows():   
        event_url = event.url
        event_id = event.event_id    
        if index%5==0:
            print(f'{index} of {len(events_df)}: {event_id}') 
        if event_id != 0:
            scrape_one_v1_urls(event_id)
        else: 
            print(f"skipping {index}-{event_url}")
        time.sleep(1)

In [15]:
def scrape_one_v1_urls(event_id):
    v1_url = f'http://liveapi.fightmetric.com/V1/{event_id}/Fnt.json'
    
    response = requests.get(v1_url)
    
    if response.status_code == 200:    
        data = response.json()
        
        file_path = f"../Scraped_V1_Jsons/v1_event_{event_id}.json"
        directory = os.path.dirname(file_path)

        with open(file_path, 'w') as f:
            json.dump(data, f)
    else:
        log_v1_response_error(event_id)

In [8]:
def log_v1_response_error(event_id):
    print("there was a response error")
    with open('error_log_v1_scrape.csv', 'a+', newline='') as f:
        f.write(f'\n**********************************************')
        f.write(f'\nProblem Type:  Response Error')
        f.write(f'\nDateTime:  {datetime.datetime.now().strftime("%Y-%m-%d %H:%M")}')
        f.write(f'\nEvent_id: {event_id}')

In [14]:
!ls ../Scraped_V1_Jsons

event_866.json


In [16]:
scrape_one_v1_urls(866)

In [22]:
scrape_all_v1_urls(events)

0 of 349: 872
5 of 349: 870
10 of 349: 862
15 of 349: 857
20 of 349: 851
25 of 349: 844
30 of 349: 831
35 of 349: 829
40 of 349: 838
45 of 349: 822
50 of 349: 821
55 of 349: 815
60 of 349: 809
65 of 349: 803
70 of 349: 798
75 of 349: 792
80 of 349: 784
85 of 349: 782
90 of 349: 780
95 of 349: 773
100 of 349: 768
105 of 349: 759
110 of 349: 758
115 of 349: 745
120 of 349: 738
125 of 349: 732
130 of 349: 717
135 of 349: 711
140 of 349: 704
145 of 349: 702
150 of 349: 697
155 of 349: 694
160 of 349: 680
165 of 349: 679
170 of 349: 671
175 of 349: 668
180 of 349: 660
185 of 349: 653
190 of 349: 646
195 of 349: 645
200 of 349: 640
205 of 349: 635
210 of 349: 629
there was a response error
there was a response error
215 of 349: 625
there was a response error
there was a response error
there was a response error
there was a response error
there was a response error
220 of 349: 619
there was a response error
there was a response error
there was a response error
there was a response error
there