In [None]:
#importing libraries
import pandas as pd
import matplotlib.pyplot as plt
from functools import reduce
import plotly.express as px
import numpy as np
import plotly.graph_objects as go
import json

Below we standardise file formatting

In [None]:
# creating a dictionary to map council codes to descriptions
code_to_desc = {}
#looping through each year to clean and save the data
for year in range(2015, 2026):
    #reading the excel file based on years (different extensions for >2022)
    if year > 2022:
        df = pd.read_excel(f"./Data/Income/Gross_Pay_{year}.xlsx",sheet_name=1).iloc[3:]
    else:
        df = pd.read_excel(f"./Data/Income/Gross_Pay_{year}.xls",sheet_name=1).iloc[3:]
    # setting the first row as header    
    df.columns = df.iloc[0]
    # all column names to strings 
    df.columns = df.columns.astype(str)
    #update code to description using current year dataframe
    code_to_desc.update(df.set_index('Code')['Description'].to_dict())
    # renaming the index
    df = df.rename_axis("Council Code", axis=1)
    #dropping rows with NaN codes
    df = df.dropna(subset=['Code'])
    # print column names 
    print (df.columns)
    # cleaning the dataframe
    df = df.drop(columns=["Description", "(thousand)", "change", "Mean", "25", "75"], axis=1)
    # renaming Median column to 50
    df = df.rename(columns={'Median': "50"})
    # setting index to Code
    df.set_index("Code", inplace=True)
    # removing last 3 columns which are empty
    df = df.iloc[:, :-3]
    # removing index name
    df.index.name = None
    # rearranging columns
    df = df[["10", "20", "30", "40", "50", "60", "70", "80", "90"]]
    # saving cleaned dataframe to csv
    df.to_csv(f"./Temp/Gross_Pay_{year}.csv", index=True)   
# saving code to description mapping to json
with open("./CleanedData/Council_to_Code.json", "w") as f:
    json.dump(code_to_desc, f, indent=4)

Index(['Description', 'Code', '(thousand)', 'Median', 'change', 'Mean',
       'change', '10', '20', '25', '30', '40', '60', '70', '75', '80', '90',
       'nan', 'nan', 'nan'],
      dtype='object', name='Council Code')
Index(['Description', 'Code', '(thousand)', 'Median', 'change', 'Mean',
       'change', '10', '20', '25', '30', '40', '60', '70', '75', '80', '90',
       'nan', 'nan', 'nan'],
      dtype='object', name='Council Code')
Index(['Description', 'Code', '(thousand)', 'Median', 'change', 'Mean',
       'change', '10', '20', '25', '30', '40', '60', '70', '75', '80', '90',
       'nan', 'nan', 'nan'],
      dtype='object', name='Council Code')
Index(['Description', 'Code', '(thousand)', 'Median', 'change', 'Mean',
       'change', '10', '20', '25', '30', '40', '60', '70', '75', '80', '90',
       'nan', 'nan', 'nan'],
      dtype='object', name='Council Code')
Index(['Description', 'Code', '(thousand)', 'Median', 'change', 'Mean',
       'change', '10', '20', '25', '30', '40

Next we load each dataframe into a list 

In [None]:
# reading all cleaned csvs into dataframes
dfs = [pd.read_csv(f"./Temp/Gross_Pay_{year}.csv") for year in range(2016, 2026)]
#loop dataframes
for i, df in enumerate(dfs):
    #Pick current dataframe
    df = dfs[i]
    # setting index to Unnamed: 0
    df.set_index("Unnamed: 0", inplace=True)
    # removing first row
    df = df.tail(-1)
    # removing index name
    df.index.name = None
    # renaming the index
    df = df.rename_axis("Council Code", axis=1)
    # updating the dataframe in the list
    dfs[i] = df

Next we take the common council codes for each dataframe so each dataframe has the same dimensions

In [None]:
# finding common indices across all dataframes
common_idx = reduce(lambda a, b: a.intersection(b.index), dfs, dfs[0].index)
# filtering each dataframe to keep only common indices
dfs = [df.loc[common_idx] for df in dfs]
# displaying the last dataframe 
dfs[-1]

Council Code,10,20,30,40,50,60,70,80,90
K02000001,11425,18560,24532,28591,32890,38000,44500,52809,69381
K03000001,11456,18613,24580,28646,32972,38061,44629,52929,69750
K04000001,11424,18589,24563,28627,32991,38058,44677,53162,70250
E92000001,11439,18653,24669,28769,33142,38292,44962,53630,71090
E12000001,10727,17213,22769,26092,29266,33006,38097,45254,56226
...,...,...,...,...,...,...,...,...,...
S12000029,10510,16577,22873,27610,31984,36257,42531,49383,x
S12000030,13366,20888,25654,30216,34244,37413,44045,50139,x
S12000039,11013,16971,22896,26576,30033,34887,41103,46589,x
S12000040,12198,19715,24994,28624,32535,37660,43582,50103,x


In [None]:
# list of years in dataframes
years = list(range(2016, 2026))
# combining dataframes side by side 
combined_df = pd.concat(dfs, axis=1)
# set multiIndex for columns 
combined_df.columns = pd.MultiIndex.from_product([years, dfs[0].columns], names=['Year', 'Analysis'])


In [None]:
# rename index from code to description
combined_df = combined_df.rename(index=code_to_desc)
# stripping whitespace from index and sorting
combined_df.index = combined_df.index.str.strip()
# sorting index
combined_df.sort_index(inplace=True)
#print merged dataframe
combined_df

Year,2016,2016,2016,2016,2016,2016,2016,2016,2016,2017,...,2024,2025,2025,2025,2025,2025,2025,2025,2025,2025
Analysis,10,20,30,40,50,60,70,80,90,10,...,90,10,20,30,40,50,60,70,80,90
Aberdeen City,x,13839,18469,22476,26507,31263,37210,45487,x,7424,...,x,12521,21151,26166,30249,35483,42605,48795,57190,x
Aberdeenshire,x,9405,12749,16731,20456,24688,29589,35388,x,6480,...,x,9812,15892,22900,27478,31551,36743,43604,51817,x
Adur,x,x,x,x,x,x,x,x,x,x,...,x,x,12564,18100,22903,28238,35178,x,x,x
Amber Valley,9416,13490,16230,19898,23197,27735,31332,39645,x,7759,...,x,12570,18433,23271,26852,31100,35307,38948,46309,x
Angus,7409,11734,15517,17604,20467,23839,26642,29806,x,x,...,x,x,14207,19290,24551,27520,31569,36992,46567,x
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wychavon,x,x,x,16921,20864,23442,27868,x,x,7663,...,x,11166,19993,24996,27194,32022,36325,41273,47320,x
Wyre,x,8542,11434,15281,18155,19449,23791,x,x,x,...,x,x,15251,20382,24491,27657,29924,34803,x,x
Wyre Forest,x,x,x,x,x,x,x,x,x,x,...,x,x,15381,20861,25219,27346,29584,35842,x,x
York UA,7019,11958,15420,18240,21670,25045,29880,36288,x,x,...,x,x,18228,24917,28003,32151,37407,44894,55662,x


In [None]:
# importing libraries
import pandas as pd
import numpy as np

# creating a copy of the combined dataframe
df = combined_df.copy()
# replacing "x" with NaN and converting to numeric
df = df.replace("x", np.nan)  
# converting all values to numeric
df = df.apply(pd.to_numeric)

# getting list of years and percentiles
years = sorted(list(set(df.columns.get_level_values(0))))
percentiles = ['10','20','30','40','50','60','70','80','90']
#list to track rows to drop
rows_drop_backwards = []

# backward fill for 10th and 90th percentiles
for idx, row in df.iterrows():
# iterate over 10th and 90th percentiles
    for p in ['10','90']:
        # initialize last value and nan run counter
        last_value = None
        nan_run = 0
# iterate over years using this percentile
        for y in years:
            col = (y, p)
# check if column exists, if not continue
            if col not in df.columns:
                continue
# row value for the year/percentile
            val = row[col]
# if value is NaN, update using nan run counter
            if pd.isna(val):
                nan_run += 1
# if nan run reaches 2, mark row for dropping and break
                if nan_run >= 2:
                    rows_drop_backwards.append(idx)
                    break
# else, fill NaN with last known value
                if last_value is not None:
                    df.at[idx, col] = last_value
# update last value and reset nan run counter
            else:
                last_value = val
                nan_run = 0
# if row is marked for dropping, break
        if idx in rows_drop_backwards:
            break
# drop rows marked for dropping
df = df.drop(rows_drop_backwards)
rows_drop_middle = []
# iterate over each row
for idx, row in df.iterrows():
    for p in ['50','60','70','80','90']:
# initialize nan run counter
        nan_run = 0
# iterate over years using this percentile
        for y in years:
            col = (y, p)
# skip if column doesn't exist
            if col not in df.columns:
                continue
# check for NaN and update nan run counter
            if pd.isna(row[col]):
                nan_run += 1
            else:
                nan_run = 0
# if nan run reaches 2, mark row for dropping and break
            if nan_run >= 2:
                rows_drop_middle.append(idx)
                break
# if row is marked for dropping, break
        if idx in rows_drop_middle:
            break
# drop rows marked for dropping
df = df.drop(rows_drop_middle)
# find nearest non-NaN values to left and right
def nearest_left(series, pos):
    for i in range(pos - 1, -1, -1):
        if pd.notna(series.iloc[i]):
            return series.iloc[i]
    return None
# find nearest non-NaN values to right
def nearest_right(series, pos):
    for i in range(pos + 1, len(series)):
        if pd.notna(series.iloc[i]):
            return series.iloc[i]
    return None

# list to track rows to drop
rows_drop_sideways = []
# iterate over each row
for idx, row in df.iterrows():
# convert row to array 
    row_vals = row.values
# make a copy of the row 
    s = row.copy()
    # get row position
    row_pos = df.index.get_loc(idx)
# iterate over each column in the row
    for col_i in range(len(row_vals)):
# check if value is NaN
        if pd.isna(row_vals[col_i]):
# find nearest left and right non-NaN values
            left = nearest_left(s, col_i)
            right = nearest_right(s, col_i)
            # if both left and right are None, mark row for dropping and break
            if left is None and right is None:
                rows_drop_sideways.append(idx)
                break
            # fill NaN with average of left and right if both exist, else use whichever exists
            if left is not None and right is not None:
                val = (left + right) / 2
                #if only one of left or right is not NaN, use that value
            elif left is not None:
                val = left
            else:
                val = right
# update dataframe with filled value
            df.iat[row_pos, col_i] = val
# drop rows marked for dropping
df = df.drop(rows_drop_sideways)
# final cleaned dataframe
cleaned_df = df.copy()
cleaned_df



Year,2016,2016,2016,2016,2016,2016,2016,2016,2016,2017,...,2024,2025,2025,2025,2025,2025,2025,2025,2025,2025
Analysis,10,20,30,40,50,60,70,80,90,10,...,90,10,20,30,40,50,60,70,80,90
Birmingham,8055.0,12848.0,16749.0,20191.0,23788.0,27714.0,32959.0,38544.0,47934.0,7955.0,...,67913.0,12006.0,20036.0,25280.0,29551.0,33374.0,38241.0,44963.0,53678.0,69286.0
City of Edinburgh,9279.0,15113.0,19413.0,22571.0,25971.0,30300.0,35922.0,42850.0,57613.0,9525.0,...,72820.0,14777.0,24343.0,28725.0,33171.0,38345.0,44297.0,52145.0,63839.0,82187.0
Cornwall UA,5850.0,9624.0,12667.0,15536.0,17868.0,21027.0,24795.0,29305.0,17671.5,6038.0,...,48159.0,9281.0,13572.0,18867.0,23402.0,27006.0,31230.0,35545.0,41297.0,50153.0
Derbyshire,7200.0,10943.0,14359.0,17601.0,20927.0,24306.0,28553.0,34126.0,43291.0,6897.0,...,51566.0,11995.0,16875.0,22883.0,26301.0,30000.0,34502.0,39471.0,46021.0,55441.0
Devon,6603.0,10588.0,14582.0,17756.0,20726.0,23747.0,27786.0,32728.0,40729.0,6878.0,...,50948.0,10153.0,16410.0,22368.0,25716.0,29114.0,33255.0,37779.0,44390.0,53241.0
East,6515.0,10792.0,14927.0,18622.0,22330.0,26219.0,31200.0,37716.0,48636.0,6673.0,...,61999.0,9932.0,16361.0,23099.0,27399.0,31513.0,36723.0,43000.0,50881.0,67033.0
East Midlands,7298.0,11449.0,14991.0,18014.0,21122.0,24668.0,29098.0,34814.0,44149.0,7160.0,...,56006.0,11008.0,16852.0,23095.0,26481.0,30000.0,34312.0,39606.0,46863.0,58581.0
England,7465.0,11977.0,16000.0,19640.0,23334.0,27562.0,32602.0,39450.0,51402.0,7564.0,...,66976.0,11439.0,18653.0,24669.0,28769.0,33142.0,38292.0,44962.0,53630.0,71090.0
England and Wales,7468.0,11958.0,15944.0,19530.0,23178.0,27385.0,32429.0,39143.0,50879.0,7547.0,...,66101.0,11424.0,18589.0,24563.0,28627.0,32991.0,38058.0,44677.0,53162.0,70250.0
Essex,6182.0,10303.0,14340.0,17967.0,21387.0,25176.0,30205.0,36877.0,48224.0,6420.0,...,61249.0,9559.0,15360.0,22391.0,26640.0,30898.0,35812.0,41694.0,50074.0,66286.0


In [None]:
#drop mean columns
combined_percentiles = combined_df.loc[:, [col for col in combined_df.columns if col[1] not in ['Mean']]]
# converting all values to numeric
combined_percentiles = combined_percentiles.apply(pd.to_numeric, errors='coerce')
# save to csv
combined_percentiles.to_csv(f"./CleanedData/IncomeData.csv", index=True)  


In [None]:
# select only median columns
combined_median = combined_df.loc[:, [col for col in combined_df.columns if col[1]  in ['50']]]
# converting all values to numeric
combined_median = combined_median.apply(pd.to_numeric, errors='coerce')

combined_median

Year,2016,2017,2018,2019,2020,2021,2022,2023,2024,2025
Analysis,50,50,50,50,50,50,50,50,50,50
Aberdeen City,26507.0,25349.0,26087.0,27435.0,27970.0,26844.0,29429.0,33319.0,35160.0,35483.0
Aberdeenshire,20456.0,20726.0,22702.0,23064.0,23814.0,23473.0,25061.0,26724.0,29736.0,31551.0
Adur,,22762.0,22914.0,21447.0,22877.0,22977.0,23563.0,28029.0,27753.0,28238.0
Amber Valley,23197.0,24060.0,22683.0,24950.0,24450.0,23301.0,23924.0,26342.0,29414.0,31100.0
Angus,20467.0,20022.0,21112.0,,21785.0,22779.0,25822.0,26604.0,28748.0,27520.0
...,...,...,...,...,...,...,...,...,...,...
Wychavon,20864.0,21401.0,22810.0,22525.0,22543.0,24343.0,,29891.0,31721.0,32022.0
Wyre,18155.0,,17902.0,19227.0,20064.0,20985.0,23043.0,24953.0,26691.0,27657.0
Wyre Forest,,18181.0,18158.0,20783.0,20293.0,22665.0,23501.0,,25605.0,27346.0
York UA,21670.0,21404.0,20681.0,23926.0,24463.0,25366.0,27576.0,29121.0,32511.0,32151.0


In [None]:
# create a copy of median dataframe
median_df = combined_median.xs('50', axis=1, level=1).copy()
# checking for adjacent NaNs in rows and removing those rows
median_df = median_df.apply(pd.to_numeric, errors='coerce')
# find rows with adjacent NaNs
rows_with_adjacent_nans = []
for idx, row in median_df.iterrows():
    # convert row to boolean array indicating NaNs
    row_is_nan = row.isna().values
    has_adjacent = False
# check for trends in NaNs 
    for i in range(len(row_is_nan) - 1):
        if row_is_nan[i] and row_is_nan[i+1]:
            has_adjacent = True
            break
# if adjacent NaNs found, mark row for dropping
    if has_adjacent:
        rows_with_adjacent_nans.append(idx)
# drop rows with adjacent NaNs
clean_df = median_df.drop(rows_with_adjacent_nans).copy()

def find_left_value(series, pos):
    """Find nearest non-NaN to the left."""
    for i in range(pos - 1, -1, -1):
        if pd.notna(series.iloc[i]):
            return series.iloc[i]
    return None

def find_right_value(series, pos):
    """Find nearest non-NaN to the right."""
    for i in range(pos + 1, len(series)):
        if pd.notna(series.iloc[i]):
            return series.iloc[i]
    return None
# list to track rows to drop
rows_to_drop = []
# fill NaN with average of left and right if both exist, else use whichever exists
for idx in clean_df.index:
    row = clean_df.loc[idx]

    for col_pos in range(len(row)):
        # check if value is NaN
        if pd.isna(row.iloc[col_pos]):
# find left and right values
            left_val = find_left_value(row, col_pos)
            right_val = find_right_value(row, col_pos)
            # none on both sides, mark row for dropping
            if left_val is None and right_val is None:
                rows_to_drop.append(idx)
                break
            # fill NaN with mean of left and right if both exist, else use whichever exists
            if left_val is not None and right_val is not None:
                new_value = (left_val + right_val) / 2.0
                #if only one of left or right is not NaN, use that value
            elif left_val is not None:
                new_value = left_val
            else:
                new_value = right_val
                # update dataframe with filled value
            clean_df.at[idx, row.index[col_pos]] = new_value
#  drop rows marked for dropping
clean_df = clean_df.drop(rows_to_drop)
# reconstruct combined_median with cleaned data
for col in clean_df.columns:
    # for each column, update combined_median
    combined_median[(col, '50')] = combined_median.index.map(
        lambda x: clean_df.at[x, col] if x in clean_df.index else np.nan
    )
#keep only cleaned indices in combined_median
combined_median = combined_median.loc[clean_df.index].copy()

In [None]:
combined_median

Year,2016,2017,2018,2019,2020,2021,2022,2023,2024,2025
Analysis,50,50,50,50,50,50,50,50,50,50
Aberdeen City,26507.0,25349.0,26087.0,27435.0,27970.0,26844.0,29429.0,33319.0,35160.0,35483.0
Aberdeenshire,20456.0,20726.0,22702.0,23064.0,23814.0,23473.0,25061.0,26724.0,29736.0,31551.0
Adur,22762.0,22762.0,22914.0,21447.0,22877.0,22977.0,23563.0,28029.0,27753.0,28238.0
Amber Valley,23197.0,24060.0,22683.0,24950.0,24450.0,23301.0,23924.0,26342.0,29414.0,31100.0
Angus,20467.0,20022.0,21112.0,21448.5,21785.0,22779.0,25822.0,26604.0,28748.0,27520.0
...,...,...,...,...,...,...,...,...,...,...
Wychavon,20864.0,21401.0,22810.0,22525.0,22543.0,24343.0,27117.0,29891.0,31721.0,32022.0
Wyre,18155.0,18028.5,17902.0,19227.0,20064.0,20985.0,23043.0,24953.0,26691.0,27657.0
Wyre Forest,18181.0,18181.0,18158.0,20783.0,20293.0,22665.0,23501.0,24553.0,25605.0,27346.0
York UA,21670.0,21404.0,20681.0,23926.0,24463.0,25366.0,27576.0,29121.0,32511.0,32151.0


In [None]:
# importing libraries
import numpy as np
import pandas as pd
# ensure df.columns is a MultiIndex
if not isinstance(combined_median.columns, pd.MultiIndex):
    raise ValueError("df.columns must be a MultiIndex")
# dictionary to hold new tercile columns
new_cols = {}
# loop over each year to calculate terciles
for year in combined_median.columns.levels[0]:
# get dataframe for a year
    year_df = combined_median[year]
# get all values as a flattened array, excluding NaNs
    values = year_df.values.flatten()
    values = values[~np.isnan(values)]
# calculate 33rd and 67th percentiles
    low_cut = np.percentile(values, 33)
    high_cut = np.percentile(values, 67)
# calculate mean across percentiles for each council
    base_vals = year_df.mean(axis=1)
# calculate tercile based on cuts
    tercile_series = base_vals.apply(
        lambda v: ( "Low" if v < low_cut else "Mid" if v < high_cut else "High" ))
# add tercile series to new columns
    new_cols[(year, "Tercile")] = tercile_series
# create dataframe from new columns
tercile_df = pd.DataFrame(new_cols, index=combined_median.index)
# set multiIndex for columns
tercile_df.columns = pd.MultiIndex.from_tuples(tercile_df.columns, names=combined_median.columns.names)
# combine median and tercile dataframes
df_with_tercile = pd.concat([combined_median, tercile_df], axis=1)
# sort columns
df_with_tercile = df_with_tercile.sort_index(axis=1)
# drop median columns, keep only tercile columns
df_with_tercile = df_with_tercile.drop(columns=df_with_tercile.columns[df_with_tercile.columns.get_level_values(1) == "50"])
# save to csv
df_with_tercile.to_csv(f"./CleanedData/Income_Tercile_Map.csv", index=True)   



In [None]:
# changing dataframe to long format for 3D plotting
df_long = cleaned_df.stack(level=[0,1]).reset_index()
# renaming columns
df_long.columns = ['Council', 'Year', 'Percentile', 'Revenue']
# converting columns to numeric
df_long['Year'] = pd.to_numeric(df_long['Year'])
df_long['Percentile'] = pd.to_numeric(df_long['Percentile'])
df_long['Revenue'] = pd.to_numeric(df_long['Revenue'])
# creating 3D scatter plot with dropdown for councils
fig = go.Figure()
# getting min and max for axes
x_min, x_max = df_long['Year'].min(), df_long['Year'].max()
y_min, y_max = df_long['Percentile'].min(), df_long['Percentile'].max()
z_min, z_max = df_long['Revenue'].min(), df_long['Revenue'].max()

# list of councils
councils = df_long['Council'].unique()
# adding traces for each council
for i, council in enumerate(councils):
    # filter dataframe per council
    df_c = df_long[df_long['Council'] == council]
    # add trace for the council to 3d scatter plot
    fig.add_trace(go.Scatter3d(x=df_c['Year'],y=df_c['Percentile'],z=df_c['Revenue'],mode='markers',marker=dict(size=5),name=council, visible=(i==0)))
# creating dropdown buttons for each council
buttons = []
for i, council in enumerate(councils):
    # create visibility list
    visible = [False]*len(councils)
    # set current council to visible
    visible[i] = True
    # add button for the council to the dropdown
    buttons.append(dict(label=council, method="update", args=[{"visible": visible}, {"title": f"Region: {council}"}]))
# updating layout with dropdown and axis titles
fig.update_layout(
    updatemenus=[dict(active=0, buttons=buttons, x=1.1, y=0.8)],
    scene=dict(
        xaxis=dict(title='Year', range=[x_min, x_max]),
        yaxis=dict(title='Percentile', range=[y_min, y_max]),
        zaxis=dict(title='Revenue', range=[z_min, z_max]),
    ),
    title="Income Percentiles By Council By Region By Year"
)
# show the figure
fig.show()


  df_long = cleaned_df.stack(level=[0,1]).reset_index()


In [None]:
# Foster-Wolfson measure calculation    
def foster_wolfson(percentiles):
#remove NaN values
    percentiles = percentiles.dropna()

    # convert percentile labels to integers
    percentiles.index = percentiles.index.astype(int)
# check for minimum required percentiles
    if len(percentiles) < 5:
        return np.nan
# check for median presence
    if 50 not in percentiles.index:
        return np.nan
# calculate median and mean income
    median_income = percentiles.loc[50]
    mean_income = percentiles.mean()
# check for zero mean income to avoid division by zero
    if mean_income == 0:
        return np.nan
# calculate Foster-Wolfson value
    total = 0

    for p in percentiles.index:
        income = percentiles.loc[p]
        weight = p / 100
        total = total + abs(income - median_income) * weight
# calculate final Foster-Wolfson value
    fw_value = (2 / mean_income) * total
    return fw_value

In [15]:
fw_results = []


In [None]:
# loop through each council and year to calculate Foster-Wolfson index
for council in cleaned_df.index:
# loop through each year
    for year in cleaned_df.columns.levels[0]:

        try:
# percentile data for council and year
            year_data = cleaned_df.loc[council, year]
# calculate Foster-Wolfson index
            fw = foster_wolfson(year_data)
# append result to list
            fw_results.append({
                "council": council,
                "year": year,
                "fw_index": fw
            })
# drop any errors silently
        except:
            pass

In [None]:
# create dataframe from results
fw_df = pd.DataFrame(fw_results)
fw_df.head()

Unnamed: 0,council,year,fw_index
0,Birmingham,2016,3.904238
1,Birmingham,2017,4.164003
2,Birmingham,2018,4.216479
3,Birmingham,2019,4.119175
4,Birmingham,2020,4.155534


In [None]:
# sort dataframe by council and year
fw_df = fw_df.sort_values(["council", "year"])

In [None]:
# add fw_change column initialized to NaN
fw_df["fw_change"] = np.nan


In [None]:
# loop through each council to calculate year-over-year changes
for council in fw_df["council"].unique():
# filter data for the council and year 
    council_data = fw_df[fw_df["council"] == council]
    council_data = council_data.sort_values("year")
# get Foster-Wolfson values as array
    fw_values = council_data["fw_index"].values
# calculate year-over-year changes
    changes = []
    changes.append(np.nan)  # first year has no change
# loop through remaining years to calculate changes
    for i in range(1, len(fw_values)):
        changes.append(fw_values[i] - fw_values[i - 1])
# assign changes back to main dataframe
    fw_df.loc[council_data.index, "fw_change"] = changes


In [21]:
fw_df


Unnamed: 0,council,year,fw_index,fw_change
0,Birmingham,2016,3.904238,
1,Birmingham,2017,4.164003,0.259764
2,Birmingham,2018,4.216479,0.052476
3,Birmingham,2019,4.119175,-0.097304
4,Birmingham,2020,4.155534,0.036358
...,...,...,...,...
415,Yorkshire and The Humber,2021,3.831163,0.035373
416,Yorkshire and The Humber,2022,3.659636,-0.171527
417,Yorkshire and The Humber,2023,3.576946,-0.082691
418,Yorkshire and The Humber,2024,3.427897,-0.149049


In [None]:
# save to csv
fw_df.to_csv(f"./CleanedData/Income_Inequality_Changes.csv", index=True)   