In [108]:
import requests
from bs4 import BeautifulSoup
import json
from datetime import datetime
import os
import shutil

In [109]:
URL = 'https://www.ufc.com/rankings'
page = requests.get(URL)
soup = BeautifulSoup(page.content, 'html.parser')
weightclass = soup.findAll('div', attrs={'class':'view-grouping'})

In [110]:
# Dictionary to store the information
rankings = {}

# Function to extract athlete data from a row
def extract_athlete_data(row):
    rank = row.select_one('.views-field-weight-class-rank').text.strip()
    name = row.select_one('.views-field-title a').text.strip()
    return rank, name

# Iterate over all view groupings (weight classes)
for grouping in soup.select('.view-grouping'):
    # Extract the weight class name
    weight_class = grouping.select_one('.view-grouping-header').text.strip()
    # Initialize a dictionary for this weight class
    rankings[weight_class] = {}
    
    # Extract champion if present
    champion_section = grouping.select_one('.rankings--athlete--champion .info h5 a')
    if champion_section:
        champion_name = champion_section.text.strip()
        rankings[weight_class]['Champion'] = champion_name
    
    # Extract other athletes
    rows = grouping.select('tbody tr')
    for row in rows:
        rank, name = extract_athlete_data(row)
        rankings[weight_class][rank] = name

# Print the resulting dictionary
for weight_class, athletes in rankings.items():
    print(f"Weight Class: {weight_class}")
    for rank, name in athletes.items():
        print(f"  {rank}: {name}")

Weight Class: Men's Pound-for-Pound Top Rank
  Champion: Islam Makhachev
  1: Islam Makhachev
  2: Alex Pereira
  3: Jon Jones
  4: Ilia Topuria
  5: Sean O'Malley
  6: Belal Muhammad
  7: Leon Edwards
  8: Alexander Volkanovski
  9: Tom Aspinall
  10: Max Holloway
  11: Dricus Du Plessis
  12: Alexandre Pantoja
  13: Israel Adesanya
  14: Sean Strickland
  15: Charles Oliveira
Weight Class: Flyweight
  Champion: Alexandre Pantoja
  1: Brandon Royval
  2: Brandon Moreno
  3: Amir Albazi
  4: Kai Kara-France
  5: Tatsuro Taira
  6: Alex Perez
  7: Steve Erceg
  8: Matheus Nicolau
  9: Manel Kape
  10: Tim Elliott
  11: Matt Schnell
  12: Tagir Ulanbekov
  13: Bruno Silva
  14: Asu Almabayev
  15: Cody Durden
Weight Class: Bantamweight
  Champion: Sean O'Malley
  1: Merab Dvalishvili
  2: Cory Sandhagen
  3: Petr Yan
  4: Marlon Vera
  5: Henry Cejudo
  6: Deiveson Figueiredo
  7: Song Yadong
  8: José Aldo
  9: Rob Font
  10: Umar Nurmagomedov
  11: Kyler Phillips
  12: Mario Bautista
 

In [111]:
del rankings["Men's Pound-for-Pound Top Rank"]

In [96]:
# Initialize an empty dictionary
fighter_gym_dict = {}

for weight_class, fighters in rankings.items():

    # List of fighter names
    names = list(fighters.values())

    for i in names:
        name = i.replace(' ', '-')  # Replace spaces with hyphens for the URL
        fighterURL = 'https://www.ufc.com/athlete/' + name
        page = requests.get(fighterURL)
        soup = BeautifulSoup(page.content, 'html.parser')

        # Find the gym location (assuming it is in the third div with class 'c-bio__text')
        gym_div = soup.findAll('div', attrs={'class': 'c-bio__text'})

        if len(gym_div) > 2:
            gym_name = gym_div[2].text.strip()  # Get gym name and remove extra spaces
        else:
            gym_name = "Gym information not found"  # Handle cases where gym info is missing

        # Add to the dictionary
        fighter_gym_dict[i] = gym_name

    # Print the dictionary
fighter_gym_dict

{'Alexandre Pantoja': 'American Top Team',
 'Brandon Royval': 'Factory X',
 'Brandon Moreno': 'Jiu-Jitsu',
 'Amir Albazi': 'Xtreme Couture - Las Vegas, NV',
 'Kai Kara-France': 'City Kickboxing',
 'Tatsuro Taira': '24',
 'Alex Perez': 'Syndicate MMA',
 'Steve Erceg': 'MMA',
 'Matheus Nicolau': 'Nova Uniao - Rio de Janeiro',
 'Manel Kape': 'Xtreme Couture - Las Vegas, NV',
 'Tim Elliott': 'Freestyle',
 'Matt Schnell': 'Boxing',
 'Tagir Ulanbekov': 'Eagles MMA Team',
 'Bruno Silva': 'Grappler',
 'Asu Almabayev': 'Gym information not found',
 'Cody Durden': 'American Top Team',
 "Sean O'Malley": 'MMA LAB - Glendale, AZ',
 'Merab Dvalishvili': 'Team Serra/Longo',
 'Cory Sandhagen': 'MMA',
 'Petr Yan': 'Team Yan',
 'Marlon Vera': 'Grappler',
 'Henry Cejudo': 'Fight Ready - Scottsdale, AZ',
 'Deiveson Figueiredo': 'Team Figueiredo',
 'Song Yadong': 'Team Alpha Male',
 'José Aldo': 'Nova União',
 'Rob Font': 'Sityodtong - Boston',
 'Umar Nurmagomedov': 'MMA',
 'Kyler Phillips': 'The MMA Lab',

In [97]:
for weight_class, fighters in rankings.items():
    print(weight_class, fighters)

Flyweight {'Champion': 'Alexandre Pantoja', '1': 'Brandon Royval', '2': 'Brandon Moreno', '3': 'Amir Albazi', '4': 'Kai Kara-France', '5': 'Tatsuro Taira', '6': 'Alex Perez', '7': 'Steve Erceg', '8': 'Matheus Nicolau', '9': 'Manel Kape', '10': 'Tim Elliott', '11': 'Matt Schnell', '12': 'Tagir Ulanbekov', '13': 'Bruno Silva', '14': 'Asu Almabayev', '15': 'Cody Durden'}
Bantamweight {'Champion': "Sean O'Malley", '1': 'Merab Dvalishvili', '2': 'Cory Sandhagen', '3': 'Petr Yan', '4': 'Marlon Vera', '5': 'Henry Cejudo', '6': 'Deiveson Figueiredo', '7': 'Song Yadong', '8': 'José Aldo', '9': 'Rob Font', '10': 'Umar Nurmagomedov', '11': 'Kyler Phillips', '12': 'Mario Bautista', '13': 'Dominick Cruz', '14': 'Jonathan Martinez', '15': 'Montel Jackson'}
Featherweight {'Champion': 'Ilia Topuria', '1': 'Alexander Volkanovski', '2': 'Max Holloway', '3': 'Brian Ortega', '4': 'Yair Rodriguez', '5': 'Arnold Allen', '7': 'Josh Emmett', '8': 'Aljamain Sterling', '9': 'Calvin Kattar', '10': 'Giga Chikadze

In [98]:
import requests
from urllib.parse import quote
import time

# Replace 'YOUR_MAPBOX_ACCESS_TOKEN' with your actual Mapbox access token
ACCESS_TOKEN = 'pk.eyJ1IjoiYnJldHR6aW0iLCJhIjoiY2x6OTc0ZDBuMDRkaDJqcG40d3g1anE4aiJ9.XQvs8Hg7pOcp6FiIsG_PgQ'

def geocode_location_mapbox(location):
    # URL encode the location string
    encoded_location = quote(location)
    
    # Define the Mapbox Geocoding URL
    url = 'https://api.mapbox.com/geocoding/v5/mapbox.places/{}.json'.format(encoded_location)
    params = {
        'access_token': ACCESS_TOKEN,
        'limit': 1
    }

    response = requests.get(url, params=params)
    
    # Check if the response is valid
    if response.status_code == 200:
        try:
            data = response.json()
            if data['features']:
                # Extract latitude and longitude
                lat = data['features'][0]['geometry']['coordinates'][1]
                lon = data['features'][0]['geometry']['coordinates'][0]
                return lat, lon
            else:
                print(f"No results found for '{location}'")
        except ValueError:
            print("Error decoding JSON response")
    else:
        print(f"HTTP Error: {response.status_code}")
    
    return None, None

# Update rankings dictionary with gym locations and geocoded coordinates
for weight_class, athletes in rankings.items():
    print(f"Processing weight class: {weight_class}")
    for rank, athlete in athletes.items():
        gym_location = fighter_gym_dict.get(athlete)
        if gym_location:
            print(f"Geocoding gym location for {athlete}: {gym_location}")
            lat, lon = geocode_location_mapbox(gym_location)
            if lat and lon:
                # Add geocoded location to the dictionary
                rankings[weight_class][rank] = {
                    'name': athlete,
                    'gym_location': gym_location,
                    'latitude': lat,
                    'longitude': lon
                }
            else:
                # If geocoding fails, just add the name
                rankings[weight_class][rank] = {
                    'name': athlete,
                    'gym_location': gym_location,
                    'latitude': None,
                    'longitude': None
                }
            # Add a delay to avoid hitting rate limits
            time.sleep(2)

Processing weight class: Flyweight
Geocoding gym location for Alexandre Pantoja: American Top Team
Geocoding gym location for Brandon Royval: Factory X
Geocoding gym location for Brandon Moreno: Jiu-Jitsu
Geocoding gym location for Amir Albazi: Xtreme Couture - Las Vegas, NV
Geocoding gym location for Kai Kara-France: City Kickboxing
Geocoding gym location for Tatsuro Taira: 24
Geocoding gym location for Alex Perez: Syndicate MMA
Geocoding gym location for Steve Erceg: MMA
Geocoding gym location for Matheus Nicolau: Nova Uniao - Rio de Janeiro
Geocoding gym location for Manel Kape: Xtreme Couture - Las Vegas, NV
Geocoding gym location for Tim Elliott: Freestyle
Geocoding gym location for Matt Schnell: Boxing
Geocoding gym location for Tagir Ulanbekov: Eagles MMA Team
Geocoding gym location for Bruno Silva: Grappler
Geocoding gym location for Asu Almabayev: Gym information not found
Geocoding gym location for Cody Durden: American Top Team
Processing weight class: Bantamweight
Geocoding

Geocoding gym location for Alexa Grasso: Lobo Gym MMA
Geocoding gym location for Zhang Weili: Muay Thai
Geocoding gym location for Valentina Shevchenko: Tiger Muay Thai - Phuket, Thailand
Geocoding gym location for Manon Fiorot: Karate
Geocoding gym location for Julianna Peña: Sik-Jitsu Spokane, WA
Geocoding gym location for Raquel Pennington: Colorado Springs BJJ
Geocoding gym location for Rose Namajunas: Grudge Training Center
Geocoding gym location for Erin Blanchfield: Jiu-Jitsu
Geocoding gym location for Tatiana Suarez: Millennia MMA - Rancho Cucamonga, CA
Geocoding gym location for Yan Xiaonan: China Top Team Beijing
Geocoding gym location for Jéssica Andrade: PRVT Para Vale Tudo
Geocoding gym location for Kayla Harrison: Judo
Geocoding gym location for Virna Jandiroba: Jiu-Jitsu
Geocoding gym location for Ketlen Vieira: Judo
Geocoding gym location for Amanda Lemos: Marajo Brothers
Processing weight class: Women's Strawweight
Geocoding gym location for Zhang Weili: Muay Thai
Geoc

In [None]:
# Print updated rankings dictionary
print("Updated Rankings with Geocoded Locations:")
for weight_class, athletes in rankings.items():
    print(f"Weight Class: {weight_class}")
    for rank, details in athletes.items():
        print(f"Rank {rank}: {details}")

In [104]:
import json

# Your existing JSON data

# Convert to GeoJSON format
geojson = {
    "type": "FeatureCollection",
    "features": []
}

for weight_class, fighters in rankings.items():
    for rank, details in fighters.items():
        feature = {
            "type": "Feature",
            "geometry": {
                "type": "Point",
                "coordinates": [details['longitude'], details['latitude']]
            },
            "properties": {
                "name": details['name'],
                "gym": details['gym_location'],
                "rank": rank
            }
        }
        geojson["features"].append(feature)

# Save to a GeoJSON file
with open('data/fighter_gyms_2024-07-30.geojson', 'w') as f:
    json.dump(geojson, f, indent=4)


In [107]:
import csv
from datetime import datetime

# Define the CSV filename with today's date
csv_filename = f"rankings_{datetime.today().strftime('%Y-%m-%d')}.csv"

# Function to read the existing CSV file and return a dictionary of rows
def read_existing_data(filename):
    data = {}
    try:
        with open(filename, mode='r', newline='') as file:
            reader = csv.DictReader(file)
            for row in reader:
                name = row["Name"]
                data[name] = row
    except FileNotFoundError:
        # If the file doesn't exist, return an empty dictionary
        pass
    return data

# Function to write data to CSV file
def write_csv(data, filename):
    with open(filename, mode='w', newline='') as file:
        writer = csv.writer(file)
        # Write the header
        writer.writerow(["Name", "Gym", "Weight", "Rank", "latitude", "longitude"])
        # Write the rows
        for row in data.values():
            writer.writerow([
                row["Name"],
                row["Gym"],
                row["Weight"],
                row["Rank"],
                row["latitude"],
                row["longitude"]
            ])

# Read existing data
existing_data = read_existing_data(csv_filename)

# Create a list to hold the updated rows for the CSV
updated_data = {}

# Iterate over the rankings dictionary and update or add new rows
for weight_class, fighters in rankings.items():
    for rank, fighter_info in fighters.items():
        name = fighter_info["name"]
        updated_data[name] = {
            "Name": name,
            "Gym": fighter_info["gym_location"],
            "Weight": weight_class,
            "Rank": rank,
            "latitude": fighter_info["latitude"],
            "longitude": fighter_info["longitude"]
        }

# Write the updated data to the CSV file
write_csv(updated_data, csv_filename)

print(f"CSV file '{csv_filename}' updated successfully.")


CSV file 'rankings_2024-07-31.csv' updated successfully.
