In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

BASE = "https://www.ufc.com"

def get_fighters_from_letter(letter):
    url = f"{BASE}/athletes/all?letter={letter}"
    r = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
    soup = BeautifulSoup(r.text, "html.parser")

    fighters = []

    for card in soup.select(".c-listing-athlete__item"):
        name_tag = card.select_one(".c-listing-athlete__name")
        link_tag = card.find("a")

        if not name_tag or not link_tag:
            continue

        name = name_tag.get_text(strip=True)
        profile_url = BASE + link_tag["href"]

        fighters.append({
            "name": name,
            "profile_url": profile_url
        })

    return fighters


def get_official_image(profile_url):
    r = requests.get(profile_url, headers={"User-Agent": "Mozilla/5.0"})
    soup = BeautifulSoup(r.text, "html.parser")

    # Image container
    img = soup.select_one(".hero-profile__image img")

    if img and img.get("src"):
        return img["src"]

    return "NOT_FOUND"


# ---------------------------------------------------
# SCRAPE ALL UFC FIGHTERS Aâ€“Z
# ---------------------------------------------------

all_fighters = []

for letter in "abcdefghijklmnopqrstuvwxyz":
    print(f"Scraping list: {letter.upper()}")
    fighters = get_fighters_from_letter(letter)
    all_fighters.extend(fighters)
    time.sleep(0.5)

df = pd.DataFrame(all_fighters)

# ---------------------------------------------------
# GET OFFICIAL PROFILE IMAGES
# ---------------------------------------------------

image_urls = []

for i, row in df.iterrows():
    print(f"Fetching image for: {row['name']}")
    img_url = get_official_image(row['profile_url'])

    image_urls.append(img_url)
    time.sleep(0.5)


df["img_url"] = image_urls

df.to_csv("ufc_official_images.csv", index=False)

print("\nDone! Saved ufc_official_images.csv")


Scraping list: A
Scraping list: B
Scraping list: C
Scraping list: D
Scraping list: E
Scraping list: F
Scraping list: G
Scraping list: H
Scraping list: I
Scraping list: J
Scraping list: K
Scraping list: L
Scraping list: M
Scraping list: N
Scraping list: O
Scraping list: P
Scraping list: Q
Scraping list: R
Scraping list: S
Scraping list: T
Scraping list: U
Scraping list: V
Scraping list: W
Scraping list: X
Scraping list: Y
Scraping list: Z

Done! Saved ufc_official_images.csv


In [3]:
all_fighters

[]

In [6]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import urllib.parse

# ----------------------------------------
# Load your fighter dataframe
# ----------------------------------------
fighters = pd.read_csv("../training/dataset/cleaned/ufc_fighters_cleaned.csv")   # MUST contain the column: name

image_data = []

def get_fighter_image(fighter_name):
    try:
        # Bing search query
        query = urllib.parse.quote(f"{fighter_name} UFC fighter")
        url = f"https://www.bing.com/images/search?q={query}"

        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
        }

        r = requests.get(url, headers=headers)
        soup = BeautifulSoup(r.text, "html.parser")

        # Extract the FIRST image result
        img_tag = soup.find("img", {"class": "mimg"})

        if img_tag and "src" in img_tag.attrs:
            return img_tag["src"]

        return None

    except Exception as e:
        print("Error:", e)
        return None


# ----------------------------------------
# Loop through all fighters
# ----------------------------------------
for name in fighters["name"].unique():
    print(f"Scraping image for: {name}")

    url = get_fighter_image(name)

    image_data.append({
        "name": name,
        "img_url": url if url else "NOT_FOUND"
    })

    # Sleep to avoid being blocked
    time.sleep(1.2)


# ----------------------------------------
# Save output
# ----------------------------------------
df_images = pd.DataFrame(image_data)
df_images.to_csv("fighter_images.csv", index=False)

print("\nDone! Saved fighter_images.csv")


Scraping image for: Danny Abbadi
Scraping image for: Nariman Abbasov
Scraping image for: Darion Abbey
Scraping image for: David Abbott
Scraping image for: Hamdy Abdelwahab
Scraping image for: Mansur Abdul-Malik
Scraping image for: Shamil Abdurakhimov
Scraping image for: Hiroyuki Abe
Scraping image for: Daichi Abe
Scraping image for: Papy Abedi
Scraping image for: Ricardo Abreu
Scraping image for: Klidson Abreu
Scraping image for: Daniel Acacio
Scraping image for: John Adajar
Scraping image for: Juan Adams
Scraping image for: Anthony Adams
Scraping image for: Zarrukh Adashev
Scraping image for: Israel Adesanya
Scraping image for: Mohamed Ado
Scraping image for: Nick Agallar
Scraping image for: Mariya Agapova
Scraping image for: Jessica Aguilar
Scraping image for: Kevin Aguilar
Scraping image for: Jesus Aguilar
Scraping image for: Christian Aguilera
Scraping image for: Nick Aguirre
Scraping image for: Agulali
Scraping image for: Mike Aina
Scraping image for: Ashiek Ajim
Scraping image fo