In [1]:
import pandas as pd
import os

def clean_referee_column():
    """
    Cleans the 'Referee' column in two England CSV files:
      1. Removes double quotes and commas.
      2. Saves corrected files to a new Bronze subfolder.
    """
    # Define paths
    raw_data_folder = r"C:\Users\Morobang\Documents\GitHub\English Premier League & Championship Analysis\data"
    bronze_output_folder = r"C:\Users\Morobang\Documents\GitHub\English Premier League & Championship Analysis\scripts\01_Bronze\Correctly_Formated Dataset"
    
    # Input files (raw)
    input_files = [
        os.path.join(raw_data_folder, "England CSV.csv"),
        os.path.join(raw_data_folder, "England 2 CSV.csv")
    ]
    
    # Create output folder if it doesn't exist
    os.makedirs(bronze_output_folder, exist_ok=True)
    
    for file_path in input_files:
        try:
            # Read the CSV file
            df = pd.read_csv(file_path)
            
            # Check if 'Referee' column exists
            if "Referee" not in df.columns:
                print(f"Warning: 'Referee' column not found in {os.path.basename(file_path)}. Skipping.")
                continue
            
            # Clean the column (remove " and ,)
            df["Referee"] = df["Referee"].astype(str).str.replace('"', '').str.replace(',', '')
            
            # Save to new location (Bronze layer)
            output_path = os.path.join(bronze_output_folder, os.path.basename(file_path))
            df.to_csv(output_path, index=False)
            print(f"Successfully cleaned and saved: {output_path}")
            
        except Exception as e:
            print(f"Error processing {file_path}: {e}")
    
    return True

# Execute
clean_referee_column()

Successfully cleaned and saved: C:\Users\Morobang\Documents\GitHub\English Premier League & Championship Analysis\scripts\01_Bronze\Correctly_Formated Dataset\England CSV.csv
Successfully cleaned and saved: C:\Users\Morobang\Documents\GitHub\English Premier League & Championship Analysis\scripts\01_Bronze\Correctly_Formated Dataset\England 2 CSV.csv


True