In [169]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse
from urllib import request
from urllib.request import urlopen
from string import ascii_lowercase as alc
from datetime import datetime

In [170]:
base_url = 'https://www.ufc.com/events'

In [171]:
html = urlopen(base_url)
soup = BeautifulSoup(html, 'lxml')
event_titles = []
event_images = []
event_texts = []
num_Fights = -1

In [172]:
events = soup.find_all(class_='l-listing__item')

In [173]:
## grabbing fighters and their images for events

def get_fighters_images(fighter_link):
    html = urlopen(fighter_link)
    soup = BeautifulSoup(html, 'lxml')
    event_fighters_red = []
    event_fighters_blue = []

    fighter_rows = soup.find_all(class_='c-listing-fight__content-row')
    for row in fighter_rows:
        red_fighter_img = row.find('img', class_='image-style-event-fight-card-upper-body-of-standing-athlete')
        if red_fighter_img:
            red_fighter_image_url = red_fighter_img['src']
            event_fighters_red.append(red_fighter_image_url)
        else:
            red_fighter_img = row.find('img',class_='image-style-teaser')
            red_fighter_image_url = red_fighter_img['src']
            if 'https' not in red_fighter_image_url:
                red_fighter_image_url = 'https://www.ufc.com' + red_fighter_image_url
            event_fighters_red.append(red_fighter_image_url)

    blue_fighter_rows = soup.find_all(class_='c-listing-fight__corner--blue')
    for row in blue_fighter_rows:
        blue_fighter_img = row.find('img', class_='image-style-event-fight-card-upper-body-of-standing-athlete')
        if blue_fighter_img:
            blue_fighter_image_url = blue_fighter_img['src']
            event_fighters_blue.append(blue_fighter_image_url)
        else:
            blue_fighter_img = row.find('img',class_='image-style-teaser')
            blue_fighter_image_url = blue_fighter_img['src']
            if 'https' not in blue_fighter_image_url:
                blue_fighter_image_url = 'https://www.ufc.com' + blue_fighter_image_url
            event_fighters_blue.append(blue_fighter_image_url)

    return event_fighters_red, event_fighters_blue

In [174]:
def event_category(event):
    event_card = None
    if event.find_parent(class_="main-card"):
        event_card = 'main_card'
    elif event.find_parent(class_='fight-card-prelims'):
        event_card = 'prelims'
    elif event.find_parent(class_='fight-card-prelims-early'):
        event_card = 'early_prelims'
    return event_card

In [175]:
def get_fighter_texts(fighter_link):
    html = urlopen(fighter_link)
    soup = BeautifulSoup(html, 'lxml')
    event_fighters_red = []
    event_fighters_blue = []
    event_weightclass = []
    event_card = []
    
    # Extract fighter names for red corner
    red_fighter_rows = soup.find_all(class_='c-listing-fight__corner-name c-listing-fight__corner-name--red')
    for row in red_fighter_rows:
        red_fighter_given_name_tag = row.find(class_='c-listing-fight__corner-given-name')
        red_fighter_family_name_tag = row.find(class_='c-listing-fight__corner-family-name')
        
        if red_fighter_given_name_tag:
            
            red_fighter_given_name = red_fighter_given_name_tag.get_text(strip=True)
            red_fighter_family_name = red_fighter_family_name_tag.get_text(strip=True)
            red_fighter = f"{red_fighter_given_name} {red_fighter_family_name}"
            event_fighters_red.append(red_fighter)
        else:
            red_fighter_name_tag = row.find('a')
            if red_fighter_name_tag:
                
                red_fighter_name = red_fighter_name_tag.get_text(strip=True)
                event_fighters_red.append(red_fighter_name)
            else:
                print("Given name or family name not found for a red corner fighter")
    
    # Extract fighter names for blue corner
    blue_fighter_rows = soup.find_all(class_='c-listing-fight__corner-name c-listing-fight__corner-name--blue')
    for row in blue_fighter_rows:
        blue_fighter_given_name_tag = row.find(class_='c-listing-fight__corner-given-name')
        blue_fighter_family_name_tag = row.find(class_='c-listing-fight__corner-family-name')
        
        if blue_fighter_given_name_tag:
            event_card.append(event_category(blue_fighter_given_name_tag))
            blue_fighter_given_name = blue_fighter_given_name_tag.get_text(strip=True)
            blue_fighter_family_name = blue_fighter_family_name_tag.get_text(strip=True)
            blue_fighter = f"{blue_fighter_given_name} {blue_fighter_family_name}"
            event_fighters_blue.append(blue_fighter)
        else:
            blue_fighter_name_tag = row.find('a')
            if blue_fighter_name_tag:
                event_card.append(event_category(blue_fighter_name_tag))
                blue_fighter_name = blue_fighter_name_tag.get_text(strip=True)
                event_fighters_blue.append(blue_fighter_name)
            else:
                print('cannot find name for blue fighter')
    
    # Extract weight class of the fight
    weightclass_tag = soup.find_all(class_='c-listing-fight__class c-listing-fight__class--desktop')
    for row in weightclass_tag:
        weightclass_get = row.find(class_='c-listing-fight__class-text').get_text(strip=True)
        if weightclass_get:
            event_weightclass.append(weightclass_get)
        else:
            print("Weight class not found for the fight")
    
    # Determine if the fight is on the main card, prelims, or early prelims
    
    
    return event_fighters_red, event_fighters_blue, event_weightclass, event_card


In [176]:
for event in events:
    title = event.find(class_='c-card-event--result__headline').get_text(strip=True)
    date = event.find(class_='c-card-event--result__date tz-change-data').get_text(strip=True)
    redImages = event.find(class_='field field--name-red-corner field--type-entity-reference field--label-hidden field__item')
    blueImages = event.find(class_='field field--name-blue-corner field--type-entity-reference field--label-hidden field__item')
    # Extracting red image URLs
    red_image_urls = redImages.find('img', class_='image-style-event-results-athlete-headshot')
    red_urls = red_image_urls['src']
    
    # Extracting blue image URLs
    blue_image_urls = blueImages.find('img', class_='image-style-event-results-athlete-headshot')
    blue_urls = blue_image_urls['src']
    
    event_titles.append([len(event_titles), title, date, red_urls ,blue_urls])

    fighter_name_cell = event.find('h3', class_='c-card-event--result__headline')
    fighter_link = fighter_name_cell.find('a')['href'] if fighter_name_cell else None

    if fighter_link:
        full_fighter_link = urljoin(base_url, fighter_link)
        
        fighters_red, fighters_blue = get_fighters_images(full_fighter_link)
        ##event_images.append({'title': title, 'date': date, 'fighters_red': fighters_red, 'fighters_blue': fighters_blue})
        print(len(fighters_red), len(fighters_blue))
        fighter_texts_red, fighter_texts_blue, fighter_weight, fighter_card = get_fighter_texts(full_fighter_link)
        min_length = min(len(fighter_texts_red), len(fighter_texts_blue))
        
        num_Fights += 1
        for i in range(min_length):
            matchup = f"{num_Fights}, {fighter_card[i]}, {fighter_weight[i]} , {fighters_red[i]}, {fighter_texts_red[i]}, {fighter_texts_blue[i]}, {fighters_blue[i]}"
            event_texts.append(matchup)
        

13 13
13 13
14 14
14 14
12 12
1 1
1 1
0 0
14 14
11 11
12 12
12 12
14 14
13 13
12 12
11 11


In [178]:
list(event_texts)

['0, None, Heavyweight Bout , https://dmxg5wxfqgb4u.cloudfront.net/styles/event_fight_card_upper_body_of_standing_athlete/s3/2023-09/TUIVASA_TAI_L_09-09.png?itok=sq4zfwfK, Tai Tuivasa, Marcin Tybura, https://dmxg5wxfqgb4u.cloudfront.net/styles/event_fight_card_upper_body_of_standing_athlete/s3/2023-07/TYBURA_MARCIN_R_07-22.png?itok=8W25Wj7C',
 '0, None, Welterweight Bout , https://dmxg5wxfqgb4u.cloudfront.net/styles/event_fight_card_upper_body_of_standing_athlete/s3/2023-05/BATTLE_BRYAN_L_05-13.png?itok=2gvn1YMC, Bryan Battle, Ange Loosa, https://dmxg5wxfqgb4u.cloudfront.net/styles/event_fight_card_upper_body_of_standing_athlete/s3/2022-04/1fb810cf-df10-4619-8bc6-e864e660815e%252FLOOSA_ANGE_R_04-16.png?itok=FqKQ46ve',
 '0, None, Light Heavyweight Bout , https://dmxg5wxfqgb4u.cloudfront.net/styles/event_fight_card_upper_body_of_standing_athlete/s3/2023-02/SAINT_PREUX_OVINCE_L_02-18.png?itok=deYH3GC2, Ovince Saint Preux, Kennedy Nzechukwu, https://dmxg5wxfqgb4u.cloudfront.net/styles/even

In [179]:
list(event_titles)

[[0,
  'Tuivasa vs Tybura',
  'Sat, Mar 16 / 7:00 PM EDT / Main Card',
  'https://dmxg5wxfqgb4u.cloudfront.net/styles/event_results_athlete_headshot/s3/2023-09/TUIVASA_TAI_09-09.png?itok=WzM47pI5',
  'https://dmxg5wxfqgb4u.cloudfront.net/styles/event_results_athlete_headshot/s3/2023-07/TYBURA_MARCIN_07-22.png?itok=ZB0U55Kc'],
 [1,
  'Ribas vs Namajunas',
  'Sat, Mar 23 / 10:00 PM EDT / Main Card',
  'https://dmxg5wxfqgb4u.cloudfront.net/styles/event_results_athlete_headshot/s3/2023-11/RIBAS_AMANDA_11-18.png?itok=s3YuLD3a',
  'https://dmxg5wxfqgb4u.cloudfront.net/styles/event_results_athlete_headshot/s3/2023-08/NAMAJUNAS_ROSE_09-02.png?itok=NLQIZews'],
 [2,
  'Blanchfield vs Fiorot',
  'Sat, Mar 30 / 10:00 PM EDT / Main Card',
  'https://dmxg5wxfqgb4u.cloudfront.net/styles/event_results_athlete_headshot/s3/2023-08/BLANCHFIELD_ERIN.png?itok=LdvmnU-H',
  'https://dmxg5wxfqgb4u.cloudfront.net/styles/event_results_athlete_headshot/s3/2023-08/FIOROT_MANON_09-02.png?itok=6YyG08va'],
 [3,
  'V

In [180]:
df = pd.DataFrame(event_titles)

In [181]:
df.columns = ["ID", "Event_Name", "Event_Date", "Red_Fighter_images", "Blue_Fighter_images"]

In [182]:
df.head()

Unnamed: 0,ID,Event_Name,Event_Date,Red_Fighter_images,Blue_Fighter_images
0,0,Tuivasa vs Tybura,"Sat, Mar 16 / 7:00 PM EDT / Main Card",https://dmxg5wxfqgb4u.cloudfront.net/styles/ev...,https://dmxg5wxfqgb4u.cloudfront.net/styles/ev...
1,1,Ribas vs Namajunas,"Sat, Mar 23 / 10:00 PM EDT / Main Card",https://dmxg5wxfqgb4u.cloudfront.net/styles/ev...,https://dmxg5wxfqgb4u.cloudfront.net/styles/ev...
2,2,Blanchfield vs Fiorot,"Sat, Mar 30 / 10:00 PM EDT / Main Card",https://dmxg5wxfqgb4u.cloudfront.net/styles/ev...,https://dmxg5wxfqgb4u.cloudfront.net/styles/ev...
3,3,Vettori vs Allen,"Sat, Apr 6 / 6:00 PM EDT / Main Card",https://dmxg5wxfqgb4u.cloudfront.net/styles/ev...,https://dmxg5wxfqgb4u.cloudfront.net/styles/ev...
4,4,Pereira vs Hill,"Sat, Apr 13 / 10:00 PM EDT / Main Card",https://dmxg5wxfqgb4u.cloudfront.net/styles/ev...,https://dmxg5wxfqgb4u.cloudfront.net/styles/ev...


In [183]:
split_texts = [text.split(",") for text in event_texts]

In [184]:
df1 = pd.DataFrame(split_texts)

In [185]:
df1.columns =  ["ID", "Event_Card", "Event_Weight", "Red_Event_fighter_image", "Red_Fighter_Name", "Blue_Fighter_Name", "Blue_Event_fighter_image" ]

In [186]:
df1.head()

Unnamed: 0,ID,Event_Card,Event_Weight,Red_Event_fighter_image,Red_Fighter_Name,Blue_Fighter_Name,Blue_Event_fighter_image
0,0,,Heavyweight Bout,https://dmxg5wxfqgb4u.cloudfront.net/styles/e...,Tai Tuivasa,Marcin Tybura,https://dmxg5wxfqgb4u.cloudfront.net/styles/e...
1,0,,Welterweight Bout,https://dmxg5wxfqgb4u.cloudfront.net/styles/e...,Bryan Battle,Ange Loosa,https://dmxg5wxfqgb4u.cloudfront.net/styles/e...
2,0,,Light Heavyweight Bout,https://dmxg5wxfqgb4u.cloudfront.net/styles/e...,Ovince Saint Preux,Kennedy Nzechukwu,https://dmxg5wxfqgb4u.cloudfront.net/styles/e...
3,0,,Lightweight Bout,https://dmxg5wxfqgb4u.cloudfront.net/styles/e...,Thiago Moisés,Brad Riddell,https://dmxg5wxfqgb4u.cloudfront.net/styles/e...
4,0,,Women's Bantamweight Bout,https://dmxg5wxfqgb4u.cloudfront.net/styles/e...,Pannie Kianzad,Macy Chiasson,https://dmxg5wxfqgb4u.cloudfront.net/styles/e...


In [187]:
df.to_csv('UFC_Events_dates.csv', index=False)
df1.to_csv('UFC_Event_Fights.csv', index=False)

In [188]:
df2 = pd.read_csv('UFC_Events_dates.csv')
df3 = pd.read_csv('UFC_Event_Fights.csv')

In [189]:
json_Events_Dates_data = df2.to_json(orient='records')
json_Events_Fights_data = df3.to_json(orient='records')

In [190]:
with open('Event_Dates_data.js', 'w') as js_file:
    js_file.write(f'export const Events_dates_data = {json_Events_Dates_data};')
    
with open('Event_Fights_data.js', 'w') as js_file:
    js_file.write(f'export const Events_Fights_data = {json_Events_Fights_data};')