In [1]:
from src.data_cleaning.data_cleaning import (
    clean_fighter_tott,
    clean_event_details,
    clean_fighter_details,
    clean_fight_stats,
    clean_fight_results
)
from pathlib import Path
import pandas as pd

## Read data

All raw data except `ufc_fight_details.csv` that doesn't add anything.

In [2]:
raw_data_path = Path('./data/raw')

df_fighter_detail = pd.read_csv(Path(raw_data_path, 'ufc_fighter_details.csv'))
df_event_details = pd.read_csv(Path(raw_data_path, 'ufc_event_details.csv'))
df_fighter_tott = pd.read_csv(Path(raw_data_path, 'ufc_fighter_tott.csv'))
df_fight_stats = pd.read_csv(Path(raw_data_path, 'ufc_fight_stats.csv'))
df_fight_results = pd.read_csv(Path(raw_data_path, 'ufc_fight_results.csv'))

## Clean Data

* Cleans fighter stats by adjusting column names, converting string-based measurements to numerical, formatting dates, extracting IDs, and removing unneeded columns.
* Processes event details to extract IDs, convert dates, and split location data.
* Standardizes fighter details by cleaning column names and extracting IDs.
* Refines fight stats by organizing columns, splitting key data for clarity, and dropping irrelevant percentage columns.
* Enhances fight results by extracting IDs, clarifying outcomes, detailing bouts, converting time formats, and eliminating superfluous columns.

In [3]:
df_fighter_detail_clean = clean_fighter_details(df_fighter_detail)
df_event_details_clean = clean_event_details(df_event_details)
df_fighter_tott_clean = clean_fighter_tott(df_fighter_tott)
df_fight_stats_clean = clean_fight_stats(df_fight_stats)
df_fight_results_clean = clean_fight_results(df_fight_results)

## Save 
Save clean data as csv into separate folder

In [6]:
clean_data_path = Path('./data/clean')

df_fighter_detail_clean.to_csv(Path(clean_data_path,'ufc_fighter_details_clean.csv'), index=False)
df_event_details_clean.to_csv(Path(clean_data_path,'ufc_event_details_clean.csv'), index=False)
df_fighter_tott_clean.to_csv(Path(clean_data_path,'ufc_fighter_tott_clean.csv'), index=False)
df_fight_stats_clean.to_csv(Path(clean_data_path,'ufc_fight_stats_clean.csv'), index=False)
df_fight_results_clean.to_csv(Path(clean_data_path,'ufc_fight_results_clean.csv'), index=False)