In [32]:
import requests
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup

In [46]:
urls = ["https://www.tapology.com/search/mma-event-figures/ppv-pay-per-view-buys-buyrate",
       "https://www.tapology.com/search/mma-event-figures/ppv-pay-per-view-buys-buyrate?page=2"]

headers = {
    "User-Agent": "Mozilla/5.0"
}

tables = [None, None]

for i, url in enumerate(urls):
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, "html.parser")
    tables[i] = soup.find("table", class_="siteSearchResults")

In [47]:
events = []

for table in tables:
    rows = table.find_all("tr")[1:]  # Skip the header row
    for row in rows:
        cols = row.find_all("td")
        if len(cols) >= 3:
            event_name = cols[0].get_text(strip=True)
            date = cols[4].get_text(strip=True)
            buys = cols[6].get_text(strip=True)

            if "UFC" in event_name:  # Filter only UFC events
                events.append({
                    "event": event_name,
                    "date": date,
                    "ppv_buys": buys
                })

In [48]:
df_scraped = pd.DataFrame(events)
df_scraped['ppv_buys'] = df_scraped['ppv_buys'].str.replace(',', '').astype('float64')
df_scraped['date'] = pd.to_datetime(df_scraped['date'], format='%Y.%m.%d').dt.strftime('%Y-%m-%d').astype(str)
df_scraped

Unnamed: 0,event,date,ppv_buys
0,UFC 229,2018-10-06,2400000.0
1,UFC 202,2016-08-20,1600000.0
2,UFC 257,2021-01-23,1600000.0
3,UFC 264,2021-07-10,1504737.0
4,UFC 246,2020-01-18,1353429.0
...,...,...,...
206,UFC 37,2002-05-10,50000.0
207,UFC 43,2003-06-06,49000.0
208,UFC 39,2002-09-27,45000.0
209,UFC 42,2003-04-25,35000.0


In [51]:
df_downloaded = pd.read_csv('ppv-buyrates.csv', encoding='cp1252')
df_downloaded['PPV Buys'] = df_downloaded['PPV Buys'].str.replace(',', '').astype('float64')
df_downloaded['Date'] = pd.to_datetime(df_downloaded['Date'], format='%m/%d/%Y').dt.strftime('%Y-%m-%d').astype(str)
df_downloaded.head(50)

Unnamed: 0,Date,Event,PPV Buys,Unnamed: 3,Unnamed: 4
0,1997-05-30,UFC 13: The Ultimate Force,,,
1,1997-07-27,UFC 14: Showdown,,,
2,1997-10-17,UFC 15: Collision Course,,,
3,1998-03-13,UFC 16: Battle in the Bayou,,,
4,1998-05-15,UFC 17: Redemption,,,
5,1999-01-08,UFC 18: The Road to the Heavyweight Title,,,
6,1999-03-05,UFC 19: Ultimate Young Guns,,,
7,1999-05-07,UFC 20: Battle for the Gold,,,
8,1999-07-16,UFC 21: Return of the Champions,,,
9,1999-09-24,UFC 22: Only One Can be Champion,,,


In [64]:
events = pd.read_csv('Events.csv', delimiter=';', index_col=0)
events['DATE'] = pd.to_datetime(events['DATE'], format='%Y-%m-%d').dt.strftime('%Y-%m-%d').astype(str)
events

Unnamed: 0,EVENT,DATE,LOCATION
0,UFC Fight Night: Edwards vs. Brady,2025-03-22,"London, England, United Kingdom"
1,UFC Fight Night: Vettori vs. Dolidze 2,2025-03-15,"Las Vegas, Nevada, USA"
2,UFC 313: Pereira vs. Ankalaev,2025-03-08,"Las Vegas, Nevada, USA"
3,UFC Fight Night: Kape vs. Almabayev,2025-03-01,"Las Vegas, Nevada, USA"
4,UFC Fight Night: Cejudo vs. Song,2025-02-22,"Seattle, Washington, USA"
...,...,...,...
720,UFC 6: Clash of the Titans,1995-07-14,"Casper, Wyoming, USA"
721,UFC 5: The Return of the Beast,1995-04-07,"Charlotte, North Carolina, USA"
722,UFC 4: Revenge of the Warriors,1994-12-16,"Tulsa, Oklahoma, USA"
723,UFC 3: The American Dream,1994-09-09,"Charlotte, North Carolina, USA"


In [65]:
events['PPV BUYS'] = np.nan

for i in range(len(events)):
    date = events.iat[i, 1]
    ppv_scraped = np.nan
    ppv_downloaded = np.nan
    if date in df_scraped.date.values:
        ppv_scraped = df_scraped.loc[df_scraped.date == date].iat[0, 2]
    if date in df_downloaded.Date.values:
        ppv_downloaded = df_downloaded.loc[df_downloaded.Date == date].iat[0, 2]

    if ppv_scraped != np.nan:
        events.iat[i, 3] = ppv_scraped
    if ppv_downloaded != np.nan:
        events.iat[i, 3] = ppv_downloaded

In [66]:
events.info()

<class 'pandas.core.frame.DataFrame'>
Index: 725 entries, 0 to 724
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   EVENT     725 non-null    object 
 1   DATE      725 non-null    object 
 2   LOCATION  725 non-null    object 
 3   PPV BUYS  218 non-null    float64
dtypes: float64(1), object(3)
memory usage: 28.3+ KB


In [67]:
events.to_csv("Events.csv", sep=";")

In [1]:
import pandas as pd

In [4]:
events = pd.read_csv("Events.csv", sep=";", index_col=0)
events.tail(5)

Unnamed: 0,EVENT,DATE,LOCATION,PPV BUYS
720,UFC 6: Clash of the Titans,1995-07-14,"Casper, Wyoming, USA",240000.0
721,UFC 5: The Return of the Beast,1995-04-07,"Charlotte, North Carolina, USA",260000.0
722,UFC 4: Revenge of the Warriors,1994-12-16,"Tulsa, Oklahoma, USA",120000.0
723,UFC 3: The American Dream,1994-09-09,"Charlotte, North Carolina, USA",90000.0
724,UFC 2: No Way Out,1994-03-11,"Denver, Colorado, USA",300000.0
