In [1]:
import pandas as pd

# Load data from CSV files into DataFrames
df_merged_data = pd.read_csv('df_merged_data.csv')
df_merged_data

Unnamed: 0,Teams,Places,Matches,Wins,Draws,Loses,Goals,Goals Diff,Points,Year,Spend,Balance
0,Man City,1,38,28,7,3,96:34,62,91,23/24,259600000.0,-133800000.0
1,Arsenal,2,38,28,5,5,91:29,62,89,23/24,235100000.0,-165900000.0
2,Liverpool,3,38,24,10,4,86:41,45,82,23/24,172000000.0,-111300000.0
3,Aston Villa,4,38,20,8,10,76:61,15,68,23/24,111500000.0,-78950000.0
4,Tottenham,5,38,20,6,12,74:61,13,66,23/24,272100000.0,-151400000.0
...,...,...,...,...,...,...,...,...,...,...,...,...
395,Portsmouth,16,38,10,9,19,43:59,-16,39,04/05,10850000.0,-4250000.0
396,West Brom,17,38,6,16,16,36:61,-25,34,04/05,15850000.0,-13900000.0
397,Crystal Palace,18,38,7,12,19,41:62,-21,33,04/05,5150000.0,-5050000.0
398,Norwich,19,38,7,12,19,42:77,-35,33,04/05,8530000.0,-8080000.0


In [2]:
# Group by Year, sort by Spend descending, reset index, assign Spend_Rank
def assign_ranking(df):
    df = df.groupby('Year').apply(lambda x: x.sort_values(by='Spend', ascending=False)
                                          .reset_index(drop=True)
                                          .assign(Spend_Rank=lambda x: x.index + 1))
    return df.reset_index(drop=True)

# Apply the function to the entire DataFrame
df_merged_data = assign_ranking(df_merged_data)
df_merged_data

Unnamed: 0,Teams,Places,Matches,Wins,Draws,Loses,Goals,Goals Diff,Points,Year,Spend,Balance,Spend_Rank
0,Chelsea,1,38,29,8,1,72:15,57,95,04/05,162400000.0,-159100000.0,1
1,Man Utd,3,38,22,11,5,58:26,32,77,04/05,61250000.0,-51690000.0,2
2,Liverpool,5,38,17,7,14,52:41,11,58,04/05,58400000.0,-33830000.0,3
3,Tottenham,9,38,14,10,14,47:41,6,52,04/05,47130000.0,-36630000.0,4
4,Newcastle,14,38,10,14,14,47:57,-10,44,04/05,29450000.0,-7180000.0,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,Brentford,16,38,10,9,19,56:65,-9,39,23/24,72350000.0,-62100000.0,16
396,Crystal Palace,10,38,13,10,15,57:58,-1,49,23/24,67800000.0,-67800000.0,17
397,Sheff Utd,20,38,3,7,28,35:104,-69,16,23/24,66950000.0,-36050000.0,18
398,Everton,15,38,13,9,16,40:51,-11,40,23/24,40500000.0,42300000.0,19


In [3]:
# Function to calculate the correlation between two specified columns in a DataFrame
def calculate_correlation(df, col1, col2):
    # Group the DataFrame by 'Year' and calculate the correlation between col1 and col2 for each group
    correlations = df.groupby('Year').apply(
        lambda x: x[[col1, col2]].astype(float)
        .corr().loc[col1, col2]
    ).reset_index(name=f'Correlation_{col1}_{col2}')
    return correlations

# Calculate correlations between 'Places' and other columns

correlation_spend_rank = calculate_correlation(df_merged_data, 'Places', 'Spend_Rank')
correlation_spend = calculate_correlation(df_merged_data, 'Places', 'Spend')
correlation_balance = calculate_correlation(df_merged_data, 'Places', 'Balance')

In [4]:
def combine_correlations(*correlation_dfs):  
    # Combine the provided DataFrames along the columns axis
    combined_correlations = pd.concat(correlation_dfs, axis=1)
    # Remove duplicate columns that may have been introduced during the concatenation process
    combined_correlations = combined_correlations.loc[:, ~combined_correlations.columns.duplicated()]
    return combined_correlations

# Use combine_correlations fuction
combined_correlations = combine_correlations(correlation_spend_rank, correlation_spend, correlation_balance)
combined_correlations

Unnamed: 0,Year,Correlation_Places_Spend_Rank,Correlation_Places_Spend,Correlation_Places_Balance
0,04/05,0.255639,-0.489599,0.401732
1,05/06,0.615038,-0.666317,0.646187
2,06/07,0.243609,-0.336325,0.108278
3,07/08,0.419549,-0.429373,0.062513
4,08/09,0.517293,-0.294194,0.176853
5,09/10,0.556391,-0.39094,0.082461
6,10/11,0.557895,-0.540547,0.399691
7,11/12,0.362406,-0.577936,0.330702
8,12/13,0.490226,-0.572051,0.173894
9,13/14,0.574436,-0.598593,0.251391
