In [1]:
import os
import requests
import pandas as pd
import time
import json
from bs4 import BeautifulSoup
from tqdm import tqdm

In [2]:
# Create directories for saving data
DATA_DIR = "UFC_Data"
UPCOMING_DIR = "Upcoming"
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(UPCOMING_DIR, exist_ok=True)

In [3]:
# Base URLs
SEARCH_API = "https://liveapi.yext.com/v2/accounts/me/answers/vertical/query"
UFC_EVENT_API = "https://d29dxerjsp82wz.cloudfront.net/api/v3/event/live/{}.json"
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36 OPR/115.0.0.0"
}

In [4]:
# Search API for past UFC events (Yext API)
SEARCH_API_URL = "https://liveapi.yext.com/v2/accounts/me/answers/vertical/query"
API_PARAMS = {
    "experienceKey": "answers-en",
    "api_key": "850a88aeb3c29599ce2db46832aa229f",
    "v": "20220511",
    "version": "PRODUCTION",
    "locale": "en",
    "input": "Past Events",
    "verticalKey": "events",
    "limit": 50,  # Increase limit for more results
    "offset": 0,
    "retrieveFacets": "true",
    "facetFilters": "{}",
    "session_id": "38313516-bbb1-4784-835b-3017a3b6c79b",
    "sessionTrackingEnabled": "true",
    "sortBys": "[]",
    "referrerPageUrl": "https://www.ufc.com/",
    "source": "STANDARD",
    "jsLibVersion": "v1.14.3",
}

In [5]:
# UFC API URL for fetching event details
UFC_EVENT_API = "https://d29dxerjsp82wz.cloudfront.net/api/v3/event/live/{}.json"
# Headers to mimic a real browser
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
}

In [6]:
def get_past_events():
    """Fetches all past UFC events using the search API with pagination."""
    events = []
    offset = 0
    response = requests.get(SEARCH_API, headers=HEADERS, params=API_PARAMS).json()
    total_events = response.get("response", {}).get("resultsCount", 0)
    
    while offset < total_events:
        API_PARAMS["offset"] = offset
        response = requests.get(SEARCH_API, headers=HEADERS, params=API_PARAMS).json()
        results = response.get("response", {}).get("results", [])
        
        for event in results:
            event_data = event.get("data", {})
            event_id = event_data.get("c_eventID")
            event_name = event_data.get("name")
            event_date = event_data.get("c_eventDate")
            event_time = event_data.get("c_eventStartTime")
            time_zone = event_data.get("c_eventTimeZone")
            location = f"{event_data.get('c_eventCity', '')}, {event_data.get('c_eventState', '')}, {event_data.get('c_eventCountry', '')}".strip(', ')
            venue = event_data.get("venueName")
            url = event_data.get("landingPageUrl", "")
            
            if event_date and event_date < time.strftime('%Y-%m-%d'):
                events.append({
                    "Event": event_name,
                    "Date": event_date,
                    "Start Time": event_time,
                    "Time Zone": time_zone,
                    "Location": location,
                    "Venue": venue,
                    "URL": url,
                    "Event ID": event_id
                })
        
        offset += 50
    
    return events

In [7]:
def get_event_details(event_id):
    """Fetch event details using the UFC API."""
    url = UFC_EVENT_API.format(event_id)
    try:
        response = requests.get(url, headers=HEADERS, timeout=10)
        if response.status_code == 200:
            return response.json()
        else:
            print(f"⚠️ Failed to fetch details for event {event_id}. Status: {response.status_code}")
    except Exception as e:
        print(f"⚠️ Error fetching event {event_id}: {e}")
    return {}

In [8]:
def get_fight_details(event_id):
    """Fetch fight details from the UFC event API."""
    url = UFC_EVENT_API.format(event_id)
    response = requests.get(url, headers=HEADERS)
    if response.status_code != 200:
        return [], []
    
    event_data = response.json().get("LiveEventDetail", {})
    fights = event_data.get("FightCard", [])
    fight_details = []
    fight_results = []
    
    for fight in fights:
        fighters = fight.get("Fighters", [])
        if len(fighters) < 2:
            continue
        
        fight_id = fight.get("FightId")
        fight_url = f"https://www.ufc.com/event/{event_id}#{fight_id}"
        bout = f"{fighters[0]['Name']['FirstName']} {fighters[0]['Name']['LastName']} vs. {fighters[1]['Name']['FirstName']} {fighters[1]['Name']['LastName']}"
        
        fight_details.append({
            "Event": event_data.get("Name"),
            "Bout": bout,
            "Card Segment": fight.get("CardSegment"),
            "URL": fight_url
        })
        
        result = fight.get("Result", {})
        fight_results.append({
            "Event": event_data.get("Name"),
            "Date": event_data.get("StartTime"),
            "Location": event_data.get("Location", {}).get("City"),
            "Bout": bout,
            "Card Segment": fight.get("CardSegment"),
            "Winner": result.get("Outcome", "Unknown"),
            "Method": result.get("Method", "Unknown"),
            "Ending Round": result.get("EndingRound", "Unknown"),
            "Ending Time": result.get("EndingTime", "Unknown"),
            "Referee": f"{fight.get('Referee', {}).get('FirstName', '')} {fight.get('Referee', {}).get('LastName', '')}".strip()
        })
    
    return fight_details, fight_results


In [9]:
def save_to_csv(filename, data, folder=DATA_DIR):
    """Save list of dictionaries to a CSV file."""
    df = pd.DataFrame(data)
    df.to_csv(os.path.join(folder, filename), index=False)

In [10]:
def main():
    print("Fetching past UFC events...")
    events = get_past_events()
    save_to_csv("UFC_Event_Details.csv", events)
    
    fight_details = []
    fight_results = []
    for event in tqdm(events, desc="Fetching fight details"):
        event_id = event["Event ID"]
        details, results = get_fight_details(event_id)
        fight_details.extend(details)
        fight_results.extend(results)
        time.sleep(1)
    
    save_to_csv("UFC_Fight_Details.csv", fight_details)
    save_to_csv("UFC_Fight_Results.csv", fight_results)
    print("Data scraping complete. CSVs saved in UFC_Data/")

In [11]:
if __name__ == "__main__":
    main()

Fetching past UFC events...


Fetching fight details: 100%|████████████████████████████████████████████████████████| 719/719 [27:21<00:00,  2.28s/it]

Data scraping complete. CSVs saved in UFC_Data/



