# 01. Get Raw Data from webpage

Use beautifulsoup to get the raw data from the webpage.

In [72]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# URL of the webpage containing the tables
url = "https://www.mariowiki.com/Mario_Kart_8_Deluxe_in-game_statistics"

# Send an HTTP request to the webpage
response = requests.get(url)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Parse the HTML content of the page using BeautifulSoup
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find all tables on the webpage
    tables = soup.find_all('table')

    # Check if tables were found
    if tables:
        # Create a list to store DataFrames
        tables_list = []

        # Iterate through each table
        for table_num, table in enumerate(tables, start=1):
            # Create an empty DataFrame for the current table
            df = pd.DataFrame()

            # Iterate through rows and columns of the table
            for row in table.find_all('tr'):
                # Extract data from each cell (header or data)
                data = [col.get_text(strip=True) for col in row.find_all(['th', 'td'])]
                # Append a new row to the DataFrame
                df = pd.concat([df, pd.DataFrame([data])], ignore_index=True)

            # Add the DataFrame to the list
            tables_list.append(df)

        # Display the DataFrames in the list
        for table_num, df in enumerate(tables_list, start=1):
            print(f"DataFrame for Table {table_num}:\n")
            print(df)
            print("\n" + "=" * 40 + "\n")

        # If you want to save each DataFrame to a CSV file, you can uncomment the following line
        # for table_num, df in enumerate(tables_list, start=1):
        #     df.to_csv(f"table_{table_num}.csv", index=False)

    else:
        print("No tables found on the webpage.")
else:
    print("Failed to retrieve the webpage. Status code:", response.status_code)

DataFrame for Table 1:

                                                   0    1    2    3    4   \
0                                        Drivers (DV)  NaN  NaN  NaN  NaN   
1                                              Driver   WG   AC   ON   OF   
2                                            MarioMro    6    2    4    2   
3                                            LuigiLig    6    2    5    1   
4                                            PeachPch    4    3    3    3   
5                                            DaisyDsy    4    3    3    3   
6                                           YoshiYsi1    4    3    3    3   
7                                             ToadKno    3    4    3    4   
8                                         ToadetteKnc    2    5    4    2   
9                                     Koopa TroopaNok    2    4    1    5   
10                                          BowserKop   10    0    6    0   
11                                     Donkey KongDk

# 02. Select main tables

## 02.1 Drivers Stats

In [73]:
df_drivers= tables_list[0]
df_drivers.columns = df_drivers.iloc[1]
df_drivers = df_drivers.drop([0,1]) 
df_drivers = df_drivers.reset_index(drop=True)
df_drivers = df_drivers.drop(52)
df_drivers.set_index('Driver', inplace=True)
df_drivers.to_csv("driver.csv")
df_drivers= df_drivers.astype(int)
df_drivers

1,WG,AC,ON,OF,MT,SL,SW,SA,SG,TL,TW,TA,TG,IV
Driver,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
MarioMro,6,2,4,2,3,7,7,7,7,4,4,4,4,3
LuigiLig,6,2,5,1,3,7,7,7,7,5,5,5,5,3
PeachPch,4,3,3,3,4,6,6,6,6,5,5,5,5,1
DaisyDsy,4,3,3,3,4,6,6,6,6,5,5,5,5,1
YoshiYsi1,4,3,3,3,4,6,6,6,6,5,5,5,5,1
ToadKno,3,4,3,4,4,4,4,4,4,7,7,7,7,3
ToadetteKnc,2,5,4,2,4,3,3,3,3,7,7,7,7,3
Koopa TroopaNok,2,4,1,5,4,3,3,3,3,8,8,8,8,4
BowserKop,10,0,6,0,0,10,10,10,10,0,0,0,0,6
Donkey KongDkg,8,1,10,0,1,9,9,9,9,2,2,2,2,4


## 02.2 Karts

In [74]:
df_karts= tables_list[1]
df_karts.columns = df_karts.iloc[1]
df_karts = df_karts.drop([0,1])
df_karts = df_karts.reset_index(drop=True)
df_karts.set_index('Body', inplace=True)
df_karts = df_karts.to_csv("kart.csv")
df_karts


## 02.3 Tires

In [75]:
df_tires= tables_list[2]
df_tires.columns = df_tires.iloc[1]
df_tires = df_tires.drop([0,1])
df_tires = df_tires.reset_index(drop=True)
df_tires.set_index('Tire', inplace=True)
df_tires.to_csv("tire.csv")
df_tires

1,WG,AC,ON,OF,MT,SL,SW,SA,SG,TL,TW,TA,TG,IV
Tire,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
StandardNormal,2,4,2,5,4,2,3,2,3,3,3,3,3,4
Monster,4,2,3,7,3,3,2,2,1,0,1,0,1,6
Roller,0,6,0,4,6,0,3,0,3,4,4,4,4,0
Slim,2,2,4,1,3,3,2,4,2,4,4,3,4,5
Slick,3,1,4,0,2,4,0,4,0,2,0,2,1,5
Metal,4,0,1,2,2,4,3,1,2,2,2,1,0,6
Button,0,5,1,3,5,1,2,2,2,3,3,4,2,3
Off-Road,3,3,3,6,3,3,4,2,1,1,1,2,2,6
Sponge,1,4,2,6,5,1,1,1,4,2,1,2,3,4
WoodWooden,2,2,4,1,3,3,2,4,2,4,4,3,4,5


## 02.4 Gliders

In [76]:
df_gliders= tables_list[3]
df_gliders.columns = df_gliders.iloc[1]
df_gliders = df_gliders.drop([0,1])
df_gliders = df_gliders.reset_index(drop=True)
df_gliders.set_index('Glider', inplace=True)
df_gliders.to_csv("glider.csv")
df_gliders

1,WG,AC,ON,OF,MT,SL,SW,SA,SG,TL,TW,TA,TG,IV
Glider,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Super Glider,1,1,1,1,1,1,1,0,2,1,0,1,1,1
Cloud Glider,0,2,1,1,2,0,1,1,1,1,0,1,2,0
Wario Wing,2,1,2,0,1,1,0,1,2,1,1,0,1,1
Waddle Wing,1,1,1,1,1,1,1,0,2,1,0,1,1,1
Peach Parasol,1,2,2,0,2,0,0,1,1,1,1,0,2,0
Parachute,0,2,1,1,2,0,1,1,1,1,0,1,2,0
Parafoil,1,2,2,0,2,0,0,1,1,1,1,0,2,0
Flower Glider,0,2,1,1,2,0,1,1,1,1,0,1,2,0
Bowser Kite,1,2,2,0,2,0,0,1,1,1,1,0,2,0
Plane Glider,2,1,2,0,1,1,0,1,2,1,1,0,1,1


# 03. Image Links

In [77]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

def get_image_links(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find all image tags in the HTML
    img_tags = soup.find_all('img')

    # Extract image links from the src attribute
    image_links = [urljoin(url, img['src']) for img in img_tags if 'src' in img.attrs]

    return image_links

# Replace 'your_website_url' with the actual URL of the webpage you want to scrape
webpage_url = 'https://www.mariowiki.com/Mario_Kart_8_Deluxe_in-game_statistics'
image_links = get_image_links(webpage_url)

# Print the image links
for link in image_links:
    print(link)


https://mario.wiki.gallery/images/thumb/d/d9/MK8_Mario_Icon.png/64px-MK8_Mario_Icon.png
https://mario.wiki.gallery/images/thumb/5/51/MK8_Luigi_Icon.png/64px-MK8_Luigi_Icon.png
https://mario.wiki.gallery/images/thumb/c/c2/MK8_Peach_Icon.png/64px-MK8_Peach_Icon.png
https://mario.wiki.gallery/images/thumb/3/32/MK8_Daisy_Icon.png/64px-MK8_Daisy_Icon.png
https://mario.wiki.gallery/images/thumb/9/91/MK8_Yoshi_Icon.png/64px-MK8_Yoshi_Icon.png
https://mario.wiki.gallery/images/thumb/4/45/MK8_Toad_Icon.png/64px-MK8_Toad_Icon.png
https://mario.wiki.gallery/images/thumb/8/8e/MK8_Toadette_Icon.png/64px-MK8_Toadette_Icon.png
https://mario.wiki.gallery/images/thumb/b/bc/MK8_Koopa_Icon.png/64px-MK8_Koopa_Icon.png
https://mario.wiki.gallery/images/thumb/4/47/MK8_Bowser_Icon.png/64px-MK8_Bowser_Icon.png
https://mario.wiki.gallery/images/thumb/0/08/MK8_DKong_Icon.png/64px-MK8_DKong_Icon.png
https://mario.wiki.gallery/images/thumb/c/c2/MK8_Wario_Icon.png/64px-MK8_Wario_Icon.png
https://mario.wiki.gallery

In [78]:
df_driver= pd.read_csv("driver.csv", index_col=0)
df_driver["Image"] = image_links[0:52]
df_driver["Image"] = df_driver["Image"].str.replace("64px", "96px")
df_driver.to_csv("driver.csv")
df_driver

Unnamed: 0_level_0,WG,AC,ON,OF,MT,SL,SW,SA,SG,TL,TW,TA,TG,IV,Image
Driver,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
MarioMro,6,2,4,2,3,7,7,7,7,4,4,4,4,3,https://mario.wiki.gallery/images/thumb/d/d9/M...
LuigiLig,6,2,5,1,3,7,7,7,7,5,5,5,5,3,https://mario.wiki.gallery/images/thumb/5/51/M...
PeachPch,4,3,3,3,4,6,6,6,6,5,5,5,5,1,https://mario.wiki.gallery/images/thumb/c/c2/M...
DaisyDsy,4,3,3,3,4,6,6,6,6,5,5,5,5,1,https://mario.wiki.gallery/images/thumb/3/32/M...
YoshiYsi1,4,3,3,3,4,6,6,6,6,5,5,5,5,1,https://mario.wiki.gallery/images/thumb/9/91/M...
ToadKno,3,4,3,4,4,4,4,4,4,7,7,7,7,3,https://mario.wiki.gallery/images/thumb/4/45/M...
ToadetteKnc,2,5,4,2,4,3,3,3,3,7,7,7,7,3,https://mario.wiki.gallery/images/thumb/8/8e/M...
Koopa TroopaNok,2,4,1,5,4,3,3,3,3,8,8,8,8,4,https://mario.wiki.gallery/images/thumb/b/bc/M...
BowserKop,10,0,6,0,0,10,10,10,10,0,0,0,0,6,https://mario.wiki.gallery/images/thumb/4/47/M...
Donkey KongDkg,8,1,10,0,1,9,9,9,9,2,2,2,2,4,https://mario.wiki.gallery/images/thumb/0/08/M...


In [79]:
df_karts= pd.read_csv("kart.csv", index_col=0)
df_karts["Image"] = image_links[52:52+41]
df_karts["Image"]= df_karts["Image"].str.replace("100px","150px")
df_karts.to_csv("kart.csv")
df_karts

Unnamed: 0_level_0,WG,AC,ON,OF,MT,SL,SW,SA,SG,TL,TW,TA,TG,IV,Image
Body,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Standard Kart,2,4,3,3,5,3,3,3,3,3,2,3,3,3,https://mario.wiki.gallery/images/thumb/0/05/S...
Pipe Frame,1,6,3,4,6,2,3,1,1,5,4,4,2,3,https://mario.wiki.gallery/images/thumb/d/d1/P...
Mach 8,3,3,2,4,5,3,3,5,4,2,2,4,2,3,https://mario.wiki.gallery/images/thumb/d/df/M...
Steel Driver,4,1,1,3,3,4,5,2,0,1,5,1,1,6,https://mario.wiki.gallery/images/thumb/9/94/S...
Cat Cruiser,2,5,4,3,6,2,2,3,4,4,2,3,4,3,https://mario.wiki.gallery/images/thumb/f/f4/C...
Circuit Special,3,1,3,1,3,5,1,4,2,1,1,2,0,6,https://mario.wiki.gallery/images/thumb/6/6c/C...
Tri-Speeder,4,1,1,3,3,4,5,2,0,1,5,1,1,6,https://mario.wiki.gallery/images/thumb/5/56/T...
Badwagon,4,0,2,5,3,5,2,3,1,0,1,1,0,7,https://mario.wiki.gallery/images/thumb/c/c2/B...
Prancer,1,2,1,2,4,4,3,3,3,3,3,2,3,5,https://mario.wiki.gallery/images/thumb/f/ff/P...
BiddybuggyBuggybud,0,7,1,4,7,0,1,2,1,5,4,5,4,0,https://mario.wiki.gallery/images/thumb/4/45/B...


In [80]:
df_tires= pd.read_csv("tire.csv", index_col=0)
df_tires["Image"] = image_links[93:93+22]
df_tires["Image"]= df_tires["Image"].str.replace("100px","150px")
df_tires.to_csv("tire.csv")
df_tires

Unnamed: 0_level_0,WG,AC,ON,OF,MT,SL,SW,SA,SG,TL,TW,TA,TG,IV,Image
Tire,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
StandardNormal,2,4,2,5,4,2,3,2,3,3,3,3,3,4,https://mario.wiki.gallery/images/thumb/a/a8/S...
Monster,4,2,3,7,3,3,2,2,1,0,1,0,1,6,https://mario.wiki.gallery/images/thumb/2/29/M...
Roller,0,6,0,4,6,0,3,0,3,4,4,4,4,0,https://mario.wiki.gallery/images/thumb/7/76/R...
Slim,2,2,4,1,3,3,2,4,2,4,4,3,4,5,https://mario.wiki.gallery/images/thumb/f/f8/S...
Slick,3,1,4,0,2,4,0,4,0,2,0,2,1,5,https://mario.wiki.gallery/images/thumb/d/dd/S...
Metal,4,0,1,2,2,4,3,1,2,2,2,1,0,6,https://mario.wiki.gallery/images/thumb/9/96/M...
Button,0,5,1,3,5,1,2,2,2,3,3,4,2,3,https://mario.wiki.gallery/images/thumb/0/07/B...
Off-Road,3,3,3,6,3,3,4,2,1,1,1,2,2,6,https://mario.wiki.gallery/images/thumb/2/25/O...
Sponge,1,4,2,6,5,1,1,1,4,2,1,2,3,4,https://mario.wiki.gallery/images/thumb/4/4c/S...
WoodWooden,2,2,4,1,3,3,2,4,2,4,4,3,4,5,https://mario.wiki.gallery/images/thumb/0/03/W...


In [81]:
df_gliders= pd.read_csv("glider.csv", index_col=0)
df_gliders["Image"] = image_links[115:115+15]
df_gliders["Image"]= df_gliders["Image"].str.replace("100px","150px")
df_gliders.to_csv("glider.csv")
df_gliders

Unnamed: 0_level_0,WG,AC,ON,OF,MT,SL,SW,SA,SG,TL,TW,TA,TG,IV,Image
Glider,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Super Glider,1,1,1,1,1,1,1,0,2,1,0,1,1,1,https://mario.wiki.gallery/images/thumb/a/a8/S...
Cloud Glider,0,2,1,1,2,0,1,1,1,1,0,1,2,0,https://mario.wiki.gallery/images/thumb/8/84/C...
Wario Wing,2,1,2,0,1,1,0,1,2,1,1,0,1,1,https://mario.wiki.gallery/images/thumb/a/ae/W...
Waddle Wing,1,1,1,1,1,1,1,0,2,1,0,1,1,1,https://mario.wiki.gallery/images/thumb/e/ef/W...
Peach Parasol,1,2,2,0,2,0,0,1,1,1,1,0,2,0,https://mario.wiki.gallery/images/thumb/6/6e/P...
Parachute,0,2,1,1,2,0,1,1,1,1,0,1,2,0,https://mario.wiki.gallery/images/thumb/d/dd/P...
Parafoil,1,2,2,0,2,0,0,1,1,1,1,0,2,0,https://mario.wiki.gallery/images/thumb/c/c4/P...
Flower Glider,0,2,1,1,2,0,1,1,1,1,0,1,2,0,https://mario.wiki.gallery/images/thumb/b/b3/F...
Bowser Kite,1,2,2,0,2,0,0,1,1,1,1,0,2,0,https://mario.wiki.gallery/images/thumb/f/f7/B...
Plane Glider,2,1,2,0,1,1,0,1,2,1,1,0,1,1,https://mario.wiki.gallery/images/thumb/c/ca/P...


In [82]:
def eliminar_despues_primera_mayus_minus(texto):
    import re
    # Patrón regex para encontrar la primera letra mayúscula seguida de una minúscula sin espacio entre ellas
    patron = re.compile(r'([a-z])([A-Z])')
    patron_par_mayus = re.compile(r'\)([A-Z])')
    patron_numeros = re.compile(r'\d+')
    
    # Encontrar la primera coincidencia y eliminar la letra mayúscula y el texto después de la coincidencia
    match_mayus_minus = re.search(patron, texto)
    match_par_mayus = re.search(patron_par_mayus, texto)

    if match_mayus_minus and (not match_par_mayus or match_mayus_minus.start() < match_par_mayus.start()):
        resultado = texto[:match_mayus_minus.end()-1]
    elif match_par_mayus:
        resultado = texto[:match_par_mayus.end()-1]
    else:
        resultado = texto

    match_numeros = re.search(patron_numeros, resultado)

    if match_numeros:
        resultado = re.sub(patron_numeros, '', resultado)

    return resultado


In [83]:
df_driver= pd.read_csv("driver.csv")
df_driver["Driver"] = df_driver["Driver"].apply(eliminar_despues_primera_mayus_minus)
df_driver.set_index('Driver', inplace=True)
df_driver.to_csv("driver.csv")
df_driver

Unnamed: 0_level_0,WG,AC,ON,OF,MT,SL,SW,SA,SG,TL,TW,TA,TG,IV,Image
Driver,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Mario,6,2,4,2,3,7,7,7,7,4,4,4,4,3,https://mario.wiki.gallery/images/thumb/d/d9/M...
Luigi,6,2,5,1,3,7,7,7,7,5,5,5,5,3,https://mario.wiki.gallery/images/thumb/5/51/M...
Peach,4,3,3,3,4,6,6,6,6,5,5,5,5,1,https://mario.wiki.gallery/images/thumb/c/c2/M...
Daisy,4,3,3,3,4,6,6,6,6,5,5,5,5,1,https://mario.wiki.gallery/images/thumb/3/32/M...
Yoshi,4,3,3,3,4,6,6,6,6,5,5,5,5,1,https://mario.wiki.gallery/images/thumb/9/91/M...
Toad,3,4,3,4,4,4,4,4,4,7,7,7,7,3,https://mario.wiki.gallery/images/thumb/4/45/M...
Toadette,2,5,4,2,4,3,3,3,3,7,7,7,7,3,https://mario.wiki.gallery/images/thumb/8/8e/M...
Koopa Troopa,2,4,1,5,4,3,3,3,3,8,8,8,8,4,https://mario.wiki.gallery/images/thumb/b/bc/M...
Bowser,10,0,6,0,0,10,10,10,10,0,0,0,0,6,https://mario.wiki.gallery/images/thumb/4/47/M...
Donkey Kong,8,1,10,0,1,9,9,9,9,2,2,2,2,4,https://mario.wiki.gallery/images/thumb/0/08/M...
