In [15]:
import pandas as pd
import matplotlib.pyplot as plt
from functools import reduce
import plotly.express as px
import numpy as np
import plotly.graph_objects as go

Below we standardise file formatting

In [16]:
code_to_desc = {}
for year in range(2015, 2026):
    if year > 2022:
        df = pd.read_excel(f"./Data/Income/Gross_Pay_{year}.xlsx",sheet_name=1).iloc[3:]
    else:
        df = pd.read_excel(f"./Data/Income/Gross_Pay_{year}.xls",sheet_name=1).iloc[3:]
    df.columns = df.iloc[0]
    code_to_desc.update(df.set_index('Code')['Description'].to_dict())
    df = df.rename_axis("Council Code", axis=1)
    df = df.dropna(subset=['Code'])
    df = df.drop(columns=["Description", "(thousand)", "change"], axis=1)
    df.set_index("Code", inplace=True)
    df = df.iloc[:, :-3]
    df.index.name = None
    df.to_csv(f"./Temp/Gross_Pay_{year}.csv", index=True)   

Next we load each dataframe into a list 

In [17]:
dfs = [pd.read_csv(f"./Temp/Gross_Pay_{year}.csv") for year in range(2016, 2026)]

for i, df in enumerate(dfs):
    df = dfs[i]
    df.set_index("Unnamed: 0", inplace=True)
    df = df.tail(-1)
    df.index.name = None
    df = df.rename_axis("Council Code", axis=1)
    dfs[i] = df

Next we take the common council codes for each dataframe so each dataframe has the same dimensions

In [18]:

common_idx = reduce(lambda a, b: a.intersection(b.index), dfs, dfs[0].index)
dfs = [df.loc[common_idx] for df in dfs]
dfs[-1]

Council Code,Median,Mean,10,20,25,30,40,60,70,75,80,90
K02000001,32890,40269,11425,18560,22060,24532,28591,38000,44500,48283,52809,69381
K03000001,32972,40439,11456,18613,22112,24580,28646,38061,44629,48408,52929,69750
K04000001,32991,40715,11424,18589,22094,24563,28627,38058,44677,48479,53162,70250
E92000001,33142,41100,11439,18653,22187,24669,28769,38292,44962,48820,53630,71090
E12000001,29266,33164,10727,17213,20252,22769,26092,33006,38097,41339,45254,56226
...,...,...,...,...,...,...,...,...,...,...,...,...
S12000029,31984,35134,10510,16577,19698,22873,27610,36257,42531,45225,49383,x
S12000030,34244,40315,13366,20888,23221,25654,30216,37413,44045,47657,50139,x
S12000039,30033,33317,11013,16971,20496,22896,26576,34887,41103,43996,46589,x
S12000040,32535,35938,12198,19715,23137,24994,28624,37660,43582,46888,50103,x


In [19]:
years = list(range(2016, 2026))
columns = dfs[0].columns
multi_cols = pd.MultiIndex.from_product([years, columns], names=['Year', 'Analysis'])
combined_df = pd.concat(dfs, axis=1)
combined_df.columns = multi_cols
combined_df


Year,2016,2016,2016,2016,2016,2016,2016,2016,2016,2016,...,2025,2025,2025,2025,2025,2025,2025,2025,2025,2025
Analysis,Median,Mean,10,20,25,30,40,60,70,75,...,10,20,25,30,40,60,70,75,80,90
K02000001,23084,28306,7498,11952,14099,15914,19466,27209,32220,35211,...,11425,18560,22060,24532,28591,38000,44500,48283,52809,69381
K03000001,23162,28431,7500,11986,14153,15961,19524,27315,32334,35300,...,11456,18613,22112,24580,28646,38061,44629,48408,52929,69750
K04000001,23178,28572,7468,11958,14135,15944,19530,27385,32429,35461,...,11424,18589,22094,24563,28627,38058,44677,48479,53162,70250
E92000001,23334,28802,7465,11977,14184,16000,19640,27562,32602,35700,...,11439,18653,22187,24669,28769,38292,44962,48820,53630,71090
E12000001,21177,24541,7451,11647,13515,15188,18225,24747,28575,31314,...,10727,17213,20252,22769,26092,33006,38097,41339,45254,56226
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
S12000029,23719,26033,7863,11892,14312,16725,20378,27545,31739,32710,...,10510,16577,19698,22873,27610,36257,42531,45225,49383,x
S12000030,23057,26244,7110,11385,13767,15065,18214,26520,31545,34261,...,13366,20888,23221,25654,30216,37413,44045,47657,50139,x
S12000039,22389,25493,9818,13159,14110,15903,18810,25477,31032,33031,...,11013,16971,20496,22896,26576,34887,41103,43996,46589,x
S12000040,21350,24357,8582,12679,14271,15734,18789,24596,28385,30948,...,12198,19715,23137,24994,28624,37660,43582,46888,50103,x


In [24]:
combined_df_no_mean = combined_df.loc[:, [col for col in combined_df.columns if col[1] not in ['Median', 'Mean']]]
combined_df_no_mean = combined_df_no_mean.apply(pd.to_numeric, errors='coerce')
combined_df_no_mean = combined_df_no_mean.rename(index=code_to_desc)
combined_df_no_mean.index = combined_df_no_mean.index.str.strip()
combined_df_no_mean.sort_index(inplace=True)


In [25]:

df_long = combined_df_no_mean.stack(level=[0,1]).reset_index()
df_long.columns = ['Council', 'Year', 'Percentile', 'Revenue']

# Convert to numeric
df_long['Year'] = pd.to_numeric(df_long['Year'])
df_long['Percentile'] = pd.to_numeric(df_long['Percentile'])
df_long['Revenue'] = pd.to_numeric(df_long['Revenue'])

fig = go.Figure()

x_min, x_max = df_long['Year'].min(), df_long['Year'].max()
y_min, y_max = df_long['Percentile'].min(), df_long['Percentile'].max()
z_min, z_max = df_long['Revenue'].min(), df_long['Revenue'].max()


councils = df_long['Council'].unique()
for i, council in enumerate(councils):
    df_c = df_long[df_long['Council'] == council]
    fig.add_trace(go.Scatter3d(
        x=df_c['Year'],
        y=df_c['Percentile'],
        z=df_c['Revenue'],
        mode='markers',
        marker=dict(size=5),
        name=council,
        visible=(i==0)  # only first visible initially
    ))

# Create dropdown buttons
buttons = []
for i, council in enumerate(councils):
    visible = [False]*len(councils)
    visible[i] = True
    buttons.append(dict(
        label=council,
        method="update",
        args=[{"visible": visible},
              {"title": f"3D Scatter for {council}"}]
    ))

fig.update_layout(
    updatemenus=[dict(active=0, buttons=buttons, x=1.1, y=0.8)],
    scene=dict(
        xaxis=dict(title='Year', range=[x_min, x_max]),
        yaxis=dict(title='Percentile', range=[y_min, y_max]),
        zaxis=dict(title='Revenue', range=[z_min, z_max]),
    ),
    title="3D Scatter of Revenue Percentiles (Fixed Axes)"
)

fig.show()




