In [39]:
import requests
from bs4 import BeautifulSoup
import json
from datetime import datetime

In [40]:
URL = 'https://www.ufc.com/rankings'
page = requests.get(URL)
soup = BeautifulSoup(page.content, 'html.parser')
weightclass = soup.findAll('div', attrs={'class':'view-grouping'})

In [41]:
# Dictionary to store the information
rankings = {}

# Function to extract athlete data from a row
def extract_athlete_data(row):
    rank = row.select_one('.views-field-weight-class-rank').text.strip()
    name = row.select_one('.views-field-title a').text.strip()
    return rank, name

# Iterate over all view groupings (weight classes)
for grouping in soup.select('.view-grouping'):
    # Extract the weight class name
    weight_class = grouping.select_one('.view-grouping-header').text.strip()
    # Initialize a dictionary for this weight class
    rankings[weight_class] = {}
    
    # Extract champion if present
    champion_section = grouping.select_one('.rankings--athlete--champion .info h5 a')
    if champion_section:
        champion_name = champion_section.text.strip()
        rankings[weight_class]['Champion'] = champion_name
    
    # Extract other athletes
    rows = grouping.select('tbody tr')
    for row in rows:
        rank, name = extract_athlete_data(row)
        rankings[weight_class][rank] = name

# Print the resulting dictionary
for weight_class, athletes in rankings.items():
    print(f"Weight Class: {weight_class}")
    for rank, name in athletes.items():
        print(f"  {rank}: {name}")

Weight Class: Men's Pound-for-Pound Top Rank
  Champion: Islam Makhachev
  1: Islam Makhachev
  2: Alex Pereira
  3: Jon Jones
  4: Ilia Topuria
  5: Sean O'Malley
  6: Belal Muhammad
  7: Leon Edwards
  8: Alexander Volkanovski
  9: Tom Aspinall
  10: Max Holloway
  11: Dricus Du Plessis
  12: Alexandre Pantoja
  13: Israel Adesanya
  14: Sean Strickland
  15: Charles Oliveira
Weight Class: Flyweight
  Champion: Alexandre Pantoja
  1: Brandon Royval
  2: Brandon Moreno
  3: Amir Albazi
  4: Kai Kara-France
  5: Tatsuro Taira
  6: Alex Perez
  7: Steve Erceg
  8: Matheus Nicolau
  9: Manel Kape
  10: Tim Elliott
  11: Matt Schnell
  12: Tagir Ulanbekov
  13: Bruno Silva
  14: Asu Almabayev
  15: Cody Durden
Weight Class: Bantamweight
  Champion: Sean O'Malley
  1: Merab Dvalishvili
  2: Cory Sandhagen
  3: Petr Yan
  4: Marlon Vera
  5: Henry Cejudo
  6: Deiveson Figueiredo
  7: Song Yadong
  8: José Aldo
  9: Rob Font
  10: Umar Nurmagomedov
  11: Kyler Phillips
  12: Mario Bautista
 

In [42]:
del rankings["Men's Pound-for-Pound Top Rank"]

In [43]:
# Define the CSV filename with today's date
csv_filename = f"rankings_{datetime.today().strftime('%Y-%m-%d')}.csv"

# Function to read the existing CSV file and return a dictionary of rows
def read_existing_data(filename):
    data = {}
    try:
        with open(filename, mode='r', newline='') as file:
            reader = csv.DictReader(file)
            for row in reader:
                name = row["Name"]
                data[name] = {
                    "gym_location": row["Gym"],
                    "latitude": float(row["latitude"]),
                    "longitude": float(row["longitude"])
                }
    except FileNotFoundError:
        # If the file doesn't exist, return an empty dictionary
        pass
    return data

# Function to write the updated rankings dictionary to a GeoJSON file
def write_geojson(data, filename):
    gym_dict = {}
    
    for weight_class, fighters in data.items():
        for rank, fighter_info in fighters.items():
            gym_location = fighter_info["gym_location"]
            if gym_location not in gym_dict:
                gym_dict[gym_location] = {
                    "latitude": fighter_info["latitude"],
                    "longitude": fighter_info["longitude"],
                    "fighters": []
                }
            gym_dict[gym_location]["fighters"].append({
                "name": fighter_info["name"],
                "weight_class": weight_class,
                "rank": rank
            })
    
    features = []
    for gym_location, gym_info in gym_dict.items():
        fighters_list = "<br>".join([
            f"{fighter['name']} ({fighter['weight_class']} - {fighter['rank']})"
            for fighter in gym_info["fighters"]
        ])
        features.append({
            "type": "Feature",
            "properties": {
                "gym_location": gym_location,
                "fighters": fighters_list
            },
            "geometry": {
                "type": "Point",
                "coordinates": [
                    gym_info["longitude"],
                    gym_info["latitude"]
                ]
            }
        })
    
    geojson = {
        "type": "FeatureCollection",
        "features": features
    }
    
    with open(filename, 'w') as file:
        json.dump(geojson, file, indent=2)

# Read existing data
existing_data = read_existing_data(csv_filename)

# Update the rankings dictionary using the existing data
updated_rankings = {}
for weight_class, fighters in rankings.items():
    updated_rankings[weight_class] = {}
    for rank, name in fighters.items():
        if name in existing_data:
            updated_rankings[weight_class][rank] = {
                "name": name,
                "gym_location": existing_data[name]["gym_location"],
                "latitude": existing_data[name]["latitude"],
                "longitude": existing_data[name]["longitude"]
            }

# Define the GeoJSON filename with today's date
geojson_filename = f"data/fighter_gyms_{datetime.today().strftime('%Y-%m-%d')}.geojson"

# Write the updated rankings dictionary to a GeoJSON file
write_geojson(updated_rankings, geojson_filename)

print(f"GeoJSON file '{geojson_filename}' created successfully.")


GeoJSON file 'rankings_2024-07-31.geojson' created successfully.
