In [643]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse
from urllib import request
from urllib.request import urlopen
from string import ascii_lowercase as alc
from datetime import datetime

In [644]:
base_url = 'https://www.ufc.com/events'

In [645]:
html = urlopen(base_url)
soup = BeautifulSoup(html, 'lxml')
event_titles = []
event_images = []
event_texts = []

In [646]:
events = soup.find_all(class_='l-listing__item')

In [647]:
## grabbing fighters and their images for events

def get_fighters_images(fighter_link):
    html = urlopen(fighter_link)
    soup = BeautifulSoup(html, 'lxml')
    event_fighters_red = []
    event_fighters_blue = []

    fighter_rows = soup.find_all(class_='c-listing-fight__content-row')
    for row in fighter_rows:
        red_fighter_img = row.find('img', class_='image-style-event-fight-card-upper-body-of-standing-athlete')
        if red_fighter_img:
            red_fighter_image_url = red_fighter_img['src']
            event_fighters_red.append(red_fighter_image_url)
        else:
            red_fighter_img = row.find('img',class_='image-style-teaser')
            red_fighter_image_url = red_fighter_img['src']
            event_fighters_red.append(red_fighter_image_url)

    blue_fighter_rows = soup.find_all(class_='c-listing-fight__corner--blue')
    for row in blue_fighter_rows:
        blue_fighter_img = row.find('img', class_='image-style-event-fight-card-upper-body-of-standing-athlete')
        if blue_fighter_img:
            blue_fighter_image_url = blue_fighter_img['src']
            event_fighters_blue.append(blue_fighter_image_url)
        else:
            blue_fighter_img = row.find('img',class_='image-style-teaser')
            blue_fighter_image_url = blue_fighter_img['src']
            event_fighters_blue.append(blue_fighter_image_url)

    return event_fighters_red, event_fighters_blue

In [648]:
def event_category(event):
    event_card = None
    if event.find_parent(class_="main-card"):
        event_card = 'main_card'
    elif event.find_parent(class_='fight-card-prelims'):
        event_card = 'prelims'
    elif event.find_parent(class_='fight-card-prelims-early'):
        event_card = 'early_prelims'
    return event_card

In [649]:
def get_fighter_texts(fighter_link):
    html = urlopen(fighter_link)
    soup = BeautifulSoup(html, 'lxml')
    event_fighters_red = []
    event_fighters_blue = []
    event_weightclass = []
    event_card = []
    
    # Extract fighter names for red corner
    red_fighter_rows = soup.find_all(class_='c-listing-fight__corner-name c-listing-fight__corner-name--red')
    for row in red_fighter_rows:
        red_fighter_given_name_tag = row.find(class_='c-listing-fight__corner-given-name')
        red_fighter_family_name_tag = row.find(class_='c-listing-fight__corner-family-name')
        
        if red_fighter_given_name_tag:
            
            red_fighter_given_name = red_fighter_given_name_tag.get_text(strip=True)
            red_fighter_family_name = red_fighter_family_name_tag.get_text(strip=True)
            red_fighter = f"{red_fighter_given_name} {red_fighter_family_name}"
            event_fighters_red.append(red_fighter)
        else:
            red_fighter_name_tag = row.find('a')
            if red_fighter_name_tag:
                
                red_fighter_name = red_fighter_name_tag.get_text(strip=True)
                event_fighters_red.append(red_fighter_name)
            else:
                print("Given name or family name not found for a red corner fighter")
    
    # Extract fighter names for blue corner
    blue_fighter_rows = soup.find_all(class_='c-listing-fight__corner-name c-listing-fight__corner-name--blue')
    for row in blue_fighter_rows:
        blue_fighter_given_name_tag = row.find(class_='c-listing-fight__corner-given-name')
        blue_fighter_family_name_tag = row.find(class_='c-listing-fight__corner-family-name')
        
        if blue_fighter_given_name_tag:
            event_card.append(event_category(blue_fighter_given_name_tag))
            blue_fighter_given_name = blue_fighter_given_name_tag.get_text(strip=True)
            blue_fighter_family_name = blue_fighter_family_name_tag.get_text(strip=True)
            blue_fighter = f"{blue_fighter_given_name} {blue_fighter_family_name}"
            event_fighters_blue.append(blue_fighter)
        else:
            blue_fighter_name_tag = row.find('a')
            if blue_fighter_name_tag:
                event_card.append(event_category(blue_fighter_name_tag))
                blue_fighter_name = blue_fighter_name_tag.get_text(strip=True)
                event_fighters_blue.append(blue_fighter_name)
            else:
                print('cannot find name for blue fighter')
    
    # Extract weight class of the fight
    weightclass_tag = soup.find_all(class_='c-listing-fight__class c-listing-fight__class--desktop')
    for row in weightclass_tag:
        weightclass_get = row.find(class_='c-listing-fight__class-text').get_text(strip=True)
        if weightclass_get:
            event_weightclass.append(weightclass_get)
        else:
            print("Weight class not found for the fight")
    
    # Determine if the fight is on the main card, prelims, or early prelims
    
    
    return event_fighters_red, event_fighters_blue, event_weightclass, event_card


In [650]:
for event in events:
    title = event.find(class_='c-card-event--result__headline').get_text(strip=True)
    date = event.find(class_='c-card-event--result__date tz-change-data').get_text(strip=True)
    event_titles.append([title, date])

    fighter_name_cell = event.find('h3', class_='c-card-event--result__headline')
    fighter_link = fighter_name_cell.find('a')['href'] if fighter_name_cell else None

    if fighter_link:
        full_fighter_link = urljoin(base_url, fighter_link)
        
        fighters_red, fighters_blue = get_fighters_images(full_fighter_link)
        ##event_images.append({'title': title, 'date': date, 'fighters_red': fighters_red, 'fighters_blue': fighters_blue})
        print(len(fighters_red), len(fighters_blue))
        fighter_texts_red, fighter_texts_blue, fighter_weight, fighter_card = get_fighter_texts(full_fighter_link)
        min_length = min(len(fighter_texts_red), len(fighter_texts_blue))
        
        for i in range(min_length):
            matchup = f"{fighter_card[i]}, {fighter_weight[i]} , {fighters_red[i]}, {fighter_texts_red[i]} VS {fighter_texts_blue[i]}, {fighters_blue[i]}"
            event_texts.append(matchup)
        

13 13
13 13
13 13
13 13
12 12
14 14
4 4
12 12
12 12
14 14
13 13
12 12
11 11
12 12
11 11
12 12


In [651]:
list(event_texts)

['main_card, Flyweight Bout , https://dmxg5wxfqgb4u.cloudfront.net/styles/event_fight_card_upper_body_of_standing_athlete/s3/2022-07/MORENO_BRANDON_L_06-12.png?itok=StNwkwbi, Brandon Moreno VS Brandon Royval, https://dmxg5wxfqgb4u.cloudfront.net/styles/event_fight_card_upper_body_of_standing_athlete/s3/2023-12/ROYVAL_BRANDON_R_12-16.png?itok=eNE7_sK6',
 'main_card, Featherweight Bout , https://dmxg5wxfqgb4u.cloudfront.net/styles/event_fight_card_upper_body_of_standing_athlete/s3/2022-07/RODRIGUEZ_YAIR_L_07-16.png?itok=EogxXVTq, Yair Rodriguez VS Brian Ortega, https://dmxg5wxfqgb4u.cloudfront.net/styles/event_fight_card_upper_body_of_standing_athlete/s3/2022-07/ORTEGA_BRIAN_R_07-16.png?itok=azAwJT3T',
 'main_card, Lightweight Bout , https://dmxg5wxfqgb4u.cloudfront.net/styles/event_fight_card_upper_body_of_standing_athlete/s3/2022-09/ZELLHUBER_DANIEL_L_09-17.png?itok=Xol_9UmI, Daniel Zellhuber VS Francisco Prado, https://dmxg5wxfqgb4u.cloudfront.net/styles/event_fight_card_upper_body_of

In [653]:
list(event_titles)

[['Moreno vs Royval 2', 'Sat, Feb 24 / 10:00 PM EST / Main Card'],
 ['Rozenstruik vs Gaziev', 'Sat, Mar 2 / 4:00 PM EST / Main Card'],
 ["O'Malley vs Vera 2", 'Sat, Mar 9 / 10:00 PM EST / Main Card'],
 ['Tuivasa vs Tybura', 'Sat, Mar 16 / 7:00 PM EDT / Main Card'],
 ['Ribas vs Namajunas', 'Sat, Mar 23 / 10:00 PM EDT / Main Card'],
 ['Blanchfield vs Fiorot', 'Sat, Mar 30 / 10:00 PM EDT / Main Card'],
 ['Vettori vs Allen', 'Sat, Apr 6 / 6:00 PM EDT / Main Card'],
 ['Pereira vs Hill', 'Sat, Apr 13 / 10:00 PM EDT / Main Card'],
 ['Volkanovski vs Topuria', 'Sat, Feb 17 / 10:00 PM EST / Main Card'],
 ['Hermansson vs Pyfer', 'Sat, Feb 10 / 7:00 PM EST / Main Card'],
 ['Dolidze vs Imavov', 'Sat, Feb 3 / 7:00 PM EST / Main Card'],
 ['Strickland vs Du Plessis', 'Sat, Jan 20 / 10:00 PM EST / Main Card'],
 ['ANKALAEV vs WALKER 2', 'Sat, Jan 13 / 7:00 PM EST / Main Card'],
 ['EDWARDS vs COVINGTON', 'Sat, Dec 16 / 10:00 PM EST / Main Card'],
 ['Song vs Gutierrez', 'Sat, Dec 9 / 10:00 PM EST / Main C