In [2]:
from bs4 import BeautifulSoup
import requests

### Task 1: Get all links to the each event

In [3]:
base_url = 'http://www.ufcstats.com./statistics/events/completed'
all_urls = [base_url + f"?page={i}" for i in range(2,23)]
all_urls.insert(0,base_url)

In [4]:
def get_links_event(urls):
    """ Return each link to the UFC events"""
    links = []
    for url in urls:
        r = requests.get(url)
        soup = BeautifulSoup(r.content)
        info_rows = soup.find_all('tr', class_='b-statistics__table-row')[2:]
    
        for row in info_rows:
            link = row.find('a')['href']
            links.append(link)
    return links

event_urls = get_links_event(all_urls)
event_urls[:10]

['http://www.ufcstats.com/event-details/d57e6a8971b6d2bd',
 'http://www.ufcstats.com/event-details/27541033b97c076d',
 'http://www.ufcstats.com/event-details/4a35913bd9aa4161',
 'http://www.ufcstats.com/event-details/9114c8ded5ccd71d',
 'http://www.ufcstats.com/event-details/6597b611f1c32555',
 'http://www.ufcstats.com/event-details/c36e1f4fa755ffb4',
 'http://www.ufcstats.com/event-details/8c90c1563972e44d',
 'http://www.ufcstats.com/event-details/6e2b1d631832921d',
 'http://www.ufcstats.com/event-details/81b57acd6975ac06',
 'http://www.ufcstats.com/event-details/bfe95ec546692b13']

### Task 2: Get information from each event

In [5]:
def get_links_fight(urls):
    """Return each link to the fight detail"""
    links = []
    for url in urls:
        r = requests.get(url)
        soup = BeautifulSoup(r.content)
        table = soup.find('table', class_='b-fight-details__table')
        rows = table.find_all('tr')
        for row in rows[1:]:
            links.append(row['data-link'])
    return links

fight_urls = get_links_fight(event_urls)

In [66]:
link = fight_urls[1]
def get_info_box(soup_obj):
    """Return a dictinary about the information box (method, round,time,referee,and detail)"""
    my_dict = {}
    ## get info box
    fight_title = soup_obj.find('div', class_='b-fight-details__fight-head').get_text(strip=True)
    my_dict['Fight Type'] = fight_title
    box = soup_obj.find('p', class_='b-fight-details__text')
    method = box.find_all(class_='b-fight-details__text-item_first')
    method = method[0].get_text('\n', strip=True).split('\n')
    my_dict[method[0]] = method[1]
    content = box.find_all(class_='b-fight-details__text-item')
    for item in content:
        pair = item.get_text('\n', strip=True).split('\n')
        my_dict[pair[0]] = pair[1]
    detail = box.find_next_sibling("p").get_text(strip=True).split(':')
    my_dict[detail[0]] = detail[1]
    
    return my_dict

def get_fight_detail(soup_obj):
    """Return a dictionary about fight details (fighter's names, significant strike"""
    my_dict = {}
    table = soup_obj.find('tbody', class_='b-fight-details__table-body')
    info = table.select('.b-fight-details__table-col')
    index = [0,2,3,4]
    info = [info[i].get_text('\n',strip=True) for i in index]
    
    # Red and blue figters
    fighters = info[0].split('\n')
    my_dict['r_figher'] = fighters[0]
    my_dict['b_figher'] = fighters[1]

    # significant strike
    sig_str = info[1].split('\n')
    my_dict['r_figher_sig_str'] = sig_str[0]
    my_dict['b_figher_sig_str'] = sig_str[1]

    # significant strik percentage
    sig_str_pct = info[2].split('\n')
    my_dict['r_figher_sig_str %'] = sig_str_pct[0]
    my_dict['b_figher_sig_str %'] = sig_str_pct[1]

    # total strike
    total_str = info[3].split('\n')
    my_dict['r_figher_total_str'] = total_str[0]
    my_dict['b_figher_total_str'] = total_str[1]

    return my_dict

def merge(dict1,dict2):
    """Merge two dictionaries"""
    res = {**dict1, **dict2}
    return res

def get_fight_info(url):
    """Return dictionary with information about the fight"""
    r = requests.get(url)
    soup = BeautifulSoup(r.content)
    
    # get info box
    dict_list = get_info_box(soup)
    # get the winner
    winner = soup.find('i', class_='b-fight-details__person-status_style_green')
    winner = winner.find_next('div', class_='b-fight-details__person-text')
    winner = winner.find('a').get_text(strip=True)
    dict_list['Winner'] = winner

    # get fight info
    fight_info = get_fight_detail(soup)
    new_dict = merge(dict_list, fight_info)
    return new_dict


get_fight_info(link)

{'Fight Type': 'Featherweight Bout',
 'Method:': 'KO/TKO',
 'Round:': '1',
 'Time:': '1:03',
 'Time format:': '3 Rnd (5-5-5)',
 'Referee:': 'Jason Herzog',
 'Details': 'Kick to Body At Distance',
 'Winner': 'Giga Chikadze',
 'r_figher': 'Giga Chikadze',
 'b_figher': 'Cub Swanson',
 'r_figher_sig_str': '12 of 18',
 'b_figher_sig_str': '4 of 6',
 'r_figher_sig_str %': '66%',
 'b_figher_sig_str %': '66%',
 'r_figher_total_str': '12 of 18',
 'b_figher_total_str': '4 of 6'}

In [70]:
fight_list = []
for link in fight_urls:
    info = get_fight_info(link)
    print(info)
    fight_list.append(info)


{'Fight Type': 'Light Heavyweight Bout', 'Method:': 'KO/TKO', 'Round:': '2', 'Time:': '4:29', 'Time format:': '5 Rnd (5-5-5-5-5)', 'Referee:': 'Herb Dean', 'Details': 'Elbow to Head At Distance\n      Spinning Back Elbow', 'Winner': 'Jiri Prochazka', 'r_figher': 'Dominick Reyes', 'b_figher': 'Jiri Prochazka', 'r_figher_sig_str': '63 of 108', 'b_figher_sig_str': '77 of 136', 'r_figher_sig_str %': '58%', 'b_figher_sig_str %': '56%', 'r_figher_total_str': '68 of 114', 'b_figher_total_str': '78 of 137'}
{'Fight Type': 'Featherweight Bout', 'Method:': 'KO/TKO', 'Round:': '1', 'Time:': '1:03', 'Time format:': '3 Rnd (5-5-5)', 'Referee:': 'Jason Herzog', 'Details': 'Kick to Body At Distance', 'Winner': 'Giga Chikadze', 'r_figher': 'Giga Chikadze', 'b_figher': 'Cub Swanson', 'r_figher_sig_str': '12 of 18', 'b_figher_sig_str': '4 of 6', 'r_figher_sig_str %': '66%', 'b_figher_sig_str %': '66%', 'r_figher_total_str': '12 of 18', 'b_figher_total_str': '4 of 6'}


AttributeError: 'NoneType' object has no attribute 'find_next'