In [472]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse
from urllib import request
from urllib.request import urlopen
from string import ascii_lowercase as alc
from datetime import datetime

In [473]:
base_url = 'https://www.ufc.com/events'

In [474]:
html = urlopen(base_url)
soup = BeautifulSoup(html, 'lxml')
event_titles = []
event_images = []
event_texts = []

In [475]:
events = soup.find_all(class_='l-listing__item')

In [476]:
## grabbing fighters and their images for events

def get_fighters_images(fighter_link):
    html = urlopen(fighter_link)
    soup = BeautifulSoup(html, 'lxml')
    event_fighters_red = []
    event_fighters_blue = []

    fighter_rows = soup.find_all(class_='c-listing-fight__content-row')
    for row in fighter_rows:
        red_fighter_img = row.find('img', class_='image-style-event-fight-card-upper-body-of-standing-athlete')
        if red_fighter_img:
            red_fighter_image_url = red_fighter_img['src']
            event_fighters_red.append(red_fighter_image_url)

    blue_fighter_rows = soup.find_all(class_='c-listing-fight__corner--blue')
    for row in blue_fighter_rows:
        blue_fighter_img = row.find('img', class_='image-style-event-fight-card-upper-body-of-standing-athlete')
        if blue_fighter_img:
            blue_fighter_image_url = blue_fighter_img['src']
            event_fighters_blue.append(blue_fighter_image_url)

    return event_fighters_red, event_fighters_blue

In [477]:
### Grabbing fighters names

def get_fighter_texts(fighter_link):
    html = urlopen(fighter_link)
    soup = BeautifulSoup(html, 'lxml')
    event_fighters_red = []
    event_fighters_blue = []
    event_weightclass = []
    
    fighter_rows = soup.find_all(class_='c-listing-fight__corner-name c-listing-fight__corner-name--red')
    for row in fighter_rows:
        red_fighter_given_name_tag = row.find(class_='c-listing-fight__corner-given-name')
        red_fighter_family_name_tag = row.find(class_='c-listing-fight__corner-family-name')
        
        if red_fighter_given_name_tag:
            # If only one of the tags is found, handle the case where the name is within a <span>
            red_fighter_given_name = red_fighter_given_name_tag.get_text(strip=True)
            red_fighter_family_name = red_fighter_family_name_tag.get_text(strip=True)
            red_fighter = f"{red_fighter_given_name} {red_fighter_family_name}"
            event_fighters_red.append(red_fighter)
        ##else:
            ##print("Given name or family name not found for a red corner fighter")
                
                
    blue_fighter_rows = soup.find_all(class_='c-listing-fight__corner-name c-listing-fight__corner-name--blue')
    for row in blue_fighter_rows:  # Iterate over blue corner fighter rows
        blue_fighter_given_name_tag = row.find(class_='c-listing-fight__corner-given-name')
        blue_fighter_family_name_tag = row.find(class_='c-listing-fight__corner-family-name')
        
        if blue_fighter_given_name_tag:
            # If only one of the tags is found, handle the case where the name is within a <span>
            blue_fighter_given_name = blue_fighter_given_name_tag.get_text(strip=True)
            blue_fighter_family_name = blue_fighter_family_name_tag.get_text(strip=True)
            blue_fighter = f"{blue_fighter_given_name} {blue_fighter_family_name}"
            event_fighters_blue.append(blue_fighter)
        ##else:
            ##print("Given name or family name not found for a blue corner fighter")
            
            
    # Extract weight class of the fight
    weightclass_tag = soup.find_all(class_='c-listing-fight__class c-listing-fight__class--desktop')
    for row in weightclass_tag:
        weightclass_get = row.find(class_='c-listing-fight__class-text').get_text(strip=True)
        if weightclass_get:
            event_weightclass.append(weightclass_get)
        else:
            print("Weight class not found for the fight")
    
    return event_fighters_red, event_fighters_blue, event_weightclass


In [478]:
for event in events:
    title = event.find(class_='c-card-event--result__headline').get_text(strip=True)
    date = event.find(class_='c-card-event--result__date tz-change-data').get_text(strip=True)
    event_titles.append([title, date])

    fighter_name_cell = event.find('h3', class_='c-card-event--result__headline')
    fighter_link = fighter_name_cell.find('a')['href'] if fighter_name_cell else None

    if fighter_link:
        full_fighter_link = urljoin(base_url, fighter_link)
        
        fighters_red, fighters_blue = get_fighters_images(full_fighter_link)
        event_images.append({'title': title, 'date': date, 'fighters_red': fighters_red, 'fighters_blue': fighters_blue})
        
        fighter_texts_red, fighter_texts_blue, fighter_weight = get_fighter_texts(full_fighter_link)
        
        print(len(fighter_texts_red), len(fighter_texts_blue), len(fighter_weight))
        min_length = min(len(fighter_texts_red), len(fighter_texts_blue))
        
        for i in range(min_length):
            matchup = f"{fighter_weight[i]} , {fighter_texts_red[i]} VS {fighter_texts_blue[i]}"
            event_texts.append(matchup)
        

11 0 13
12 0 13
12 0 13
11 0 13
11 0 12
8 0 9
4 0 4
9 0 12
12 0 12
13 0 14
12 0 13
10 0 12
10 0 11
11 0 12
9 0 11
8 0 12


In [479]:
list(event_texts)

[]

In [325]:
list(event_images)

[{'title': 'Moreno vs Royval 2',
  'date': 'Sat, Feb 24 / 10:00 PM EST / Main Card',
  'fighters_red': ['https://dmxg5wxfqgb4u.cloudfront.net/styles/event_fight_card_upper_body_of_standing_athlete/s3/2022-07/MORENO_BRANDON_L_06-12.png?itok=StNwkwbi',
   'https://dmxg5wxfqgb4u.cloudfront.net/styles/event_fight_card_upper_body_of_standing_athlete/s3/2022-07/RODRIGUEZ_YAIR_L_07-16.png?itok=EogxXVTq',
   'https://dmxg5wxfqgb4u.cloudfront.net/styles/event_fight_card_upper_body_of_standing_athlete/s3/2022-09/ZELLHUBER_DANIEL_L_09-17.png?itok=Xol_9UmI',
   'https://dmxg5wxfqgb4u.cloudfront.net/styles/event_fight_card_upper_body_of_standing_athlete/s3/2023-09/ROSAS_JR_RAUL_L_09-16.png?itok=CAr4E5Zl',
   'https://dmxg5wxfqgb4u.cloudfront.net/styles/event_fight_card_upper_body_of_standing_athlete/s3/2023-07/JAUREGUI_YAZMIN_L_07-08.png?itok=1O1je0M0',
   'https://dmxg5wxfqgb4u.cloudfront.net/styles/event_fight_card_upper_body_of_standing_athlete/s3/2023-06/TORRES_MANUEL_L_06-17.png?itok=9Cu3GNrE'

In [90]:
print(fighter_link)

/event/ufc-fight-night-december-02-2023


In [82]:
list(event_titles)

[['Moreno vs Royval 2', 'Sat, Feb 24 / 10:00 PM EST / Main Card'],
 ['Rozenstruik vs Gaziev', 'Sat, Mar 2 / 4:00 PM EST / Main Card'],
 ["O'Malley vs Vera 2", 'Sat, Mar 9 / 10:00 PM EST / Main Card'],
 ['Tuivasa vs Tybura', 'Sat, Mar 16 / 7:00 PM EDT / Main Card'],
 ['Ribas vs Namajunas', 'Sat, Mar 23 / 10:00 PM EDT / Main Card'],
 ['Blanchfield vs Fiorot', 'Sat, Mar 30 / 10:00 PM EDT / Main Card'],
 ['Vettori vs Allen', 'Sat, Apr 6 / 6:00 PM EDT / Main Card'],
 ['Pereira vs Hill', 'Sat, Apr 13 / 10:00 PM EDT / Main Card'],
 ['Volkanovski vs Topuria', 'Sat, Feb 17 / 10:00 PM EST / Main Card'],
 ['Hermansson vs Pyfer', 'Sat, Feb 10 / 7:00 PM EST / Main Card'],
 ['Dolidze vs Imavov', 'Sat, Feb 3 / 7:00 PM EST / Main Card'],
 ['Strickland vs Du Plessis', 'Sat, Jan 20 / 10:00 PM EST / Main Card'],
 ['ANKALAEV vs WALKER 2', 'Sat, Jan 13 / 7:00 PM EST / Main Card'],
 ['EDWARDS vs COVINGTON', 'Sat, Dec 16 / 10:00 PM EST / Main Card'],
 ['Song vs Gutierrez', 'Sat, Dec 9 / 10:00 PM EST / Main C