In [None]:
#### Save/reload Clubs Data
import json

def load_data(title):
    with open(title, encoding="utf-8") as f:
        return json.load(f)


premier_clubs_list = load_data("PremierL_data.json")

In [None]:
import re

def clean_data(data):
    # Remove "Coordinates" data row
    data['Data'].pop('Coordinates', None)
    
    # Remove "Website" row
    data['Data'].pop('Website', None)
    
    # Remove "2022–23" row
    data['Data'].pop('2022–23', None)
    
    # Remove "Short name" row
    data['Data'].pop('Short name', None)
    
    # Remove "Co-chairmen" row
    data['Data'].pop('Co-chairmen', None)

    # Remove references enclosed in square brackets
    for key, value in data['Data'].items():
        if isinstance(value, str):  # Check if value is a string
            cleaned_value = re.sub(r'\[[^\]]*\]', '', value).strip()
            data['Data'][key] = cleaned_value  # Update the value in the dictionary
            
    # Update "Capacity" row to return numerical values
    capacity = data['Data'].get('Capacity', '')
    if isinstance(capacity, str):
        numerical_capacity = ''.join(c for c in capacity if c.isdigit())
        data['Data']['Capacity'] = int(numerical_capacity) if numerical_capacity else None

    # Convert scientific notation to regular number format
    if 'Capacity' in data['Data'] and isinstance(data['Data']['Capacity'], (int, float)):
        data['Data']['Capacity'] = format(data['Data']['Capacity'], ',')

    # Convert "Founded" value into a numerical value
    founded = data['Data'].get('Founded', '')
    if isinstance(founded, str):
        numerical_founded = ''.join(c for c in founded if c.isdigit())
        data['Data']['Founded'] = int(numerical_founded) if numerical_founded else None

    return data

premier_clubs_list_cleaned = [clean_data(club_data) for club_data in premier_clubs_list]


In [None]:
print(premier_clubs_list_cleaned)

In [None]:
import pickle

def save_data_pickle(name, data):
    with open(name, 'wb') as f:
        pickle.dump(data, f)

In [None]:
import pickle

def load_data_pickle(name, data):
    with open(name, 'rb') as f:
        return pickle.load(f)

In [None]:
save_data_pickle("PremierL_data_cleaned.pickle", premier_clubs_list_cleaned)

In [None]:
output_file_path = 'cleaned_data.json'
with open(output_file_path, 'w') as json_file:
    json.dump(premier_clubs_list_cleaned, json_file, indent=2)


In [None]:
import pandas as pd

df = pd.DataFrame(premier_clubs_list_cleaned)
df.head()

In [None]:
df.head()

In [None]:
import pandas as pd

df = pd.DataFrame(premier_clubs_list_cleaned)

# Normalize the 'Data' column
df_normalized = pd.json_normalize(df['Data'])

# Concatenate the 'Club' column with the normalized data
df_result = pd.concat([df['Club'], df_normalized], axis=1)

# Display the first 10 rows
print(df_result.head(20))

In [None]:
from IPython.display import display

# Display the DataFrame
display(df_result.head(20
                      ))

In [None]:
df.to_csv("Premier_clubs_list_final.csv")

In [None]:
df_result.to_excel("Premier_clubs_list_final.xlsx")

In [None]:
df.columns