### Import Libraries

In [142]:
import pandas as pd
import numpy as np
from nocasedict import NocaseDict
import simplejson
import math

### Load data from CSVs

In [143]:
real_data = pd.read_csv('Raw Data Files\\LGBF_Data_Table_Real.csv')
cash_data = pd.read_csv('Raw Data Files\\LGBF_Data_Table_Cash.csv')
monthly_data = pd.read_csv('Raw Data Files\\LGBF_Data_Table_Monthly_Data.csv')
quarterly_data = pd.read_csv('Raw Data Files\\LGBF_Data_Table_Quarterly_Data.csv')
info = pd.read_csv('Data Files\\Indicator Information.csv')
fg = pd.read_csv('Data Files\\Family Groups.csv')


### Rename and Remove Columns Approriately

In [144]:
real_data = real_data[real_data['LocalAuthority'].notna()]
real_data = real_data.rename(columns={'LGBF Year': 'Period','LA_IndicatorReal': 'Value', 'LA_Numerator_real': 'Numerator', 'LA_Den_Real': 'Denominator', 'Scotland_Indicator_Real': 'Scotland Value', 'Scotland_Num_Real': 'Scotland Numerator', 'Scotland_Den_Real': 'Scotland Denominator'})
real_data = real_data[['Code', 'LocalAuthority', 'Period', 'Value', 'Numerator', 'Denominator', 'Scotland Value', 'Scotland Numerator', 'Scotland Denominator']]
real_data['Data Type'] = 'Real_Annual'


#TEMP FIX - Remove rows from Cash that are blank
cash_data = cash_data[cash_data['LA_IndicatorCash'].notna()]

cash_data = cash_data[cash_data['LocalAuthority'].notna()]
cash_data = cash_data.rename(columns={'Year':'Period','IndicatorCode':'Code','LA_IndicatorCash': 'Value', 'LA_NumeratorCash': 'Numerator', 'LA_DenominatorCash': 'Denominator', 'Scotland_IndicatorCash': 'Scotland Value', 'Scotland_NumeratorCash': 'Scotland Numerator', 'Scotland_DenominatorCash': 'Scotland Denominator'})
cash_data = cash_data[['Code', 'LocalAuthority', 'Period', 'Value', 'Numerator', 'Denominator', 'Scotland Value', 'Scotland Numerator', 'Scotland Denominator']]
cash_data['Data Type'] = 'Cash_Annual'

monthly_data = monthly_data[monthly_data['LocalAuthority'].notna()]
monthly_data['Period'] = monthly_data['Year'].astype(int).astype(str) + ';' + monthly_data['Month']
monthly_data = monthly_data.rename(columns={'LA_IndicatorReal': 'Value', 'LA_Numerator_real': 'Numerator', 'LA_Den_Real': 'Denominator', 'Scotland_Indicator_Real': 'Scotland Value', 'Scotland_Num_Real': 'Scotland Numerator', 'Scotland_Den_Real': 'Scotland Denominator'})
monthly_data = monthly_data[['Code', 'LocalAuthority', 'Period', 'Value', 'Numerator', 'Denominator', 'Scotland Value', 'Scotland Numerator', 'Scotland Denominator']]
monthly_data['Data Type'] = 'Real_Monthly'

quarterly_data = quarterly_data[quarterly_data['LocalAuthority'].notna()]
quarterly_data['Period'] = quarterly_data['LGBF Year'].astype(str) + ';' + quarterly_data['Month']
quarterly_data = quarterly_data.rename(columns={'LA_IndicatorReal': 'Value', 'LA_Numerator_real': 'Numerator', 'LA_Den_Real': 'Denominator', 'Scotland_Indicator_Real': 'Scotland Value', 'Scotland_Num_Real': 'Scotland Numerator', 'Scotland_Den_Real': 'Scotland Denominator'})
quarterly_data = quarterly_data[['Code', 'LocalAuthority', 'Period', 'Value', 'Numerator', 'Denominator', 'Scotland Value', 'Scotland Numerator', 'Scotland Denominator']]
quarterly_data['Data Type'] = 'Real_Quarterly'


### Tag and Append Data Files

In [145]:
data = real_data.append([cash_data, monthly_data, quarterly_data], ignore_index=True)
data

Unnamed: 0,Code,LocalAuthority,Period,Value,Numerator,Denominator,Scotland Value,Scotland Numerator,Scotland Denominator,Data Type
0,C&L01,Aberdeen City,2010-11,0.4067,785.0188,1922292.0,4.3749,198956.0517,45459818.0,Real_Annual
1,C&L01,Aberdeen City,2011-12,0.9227,1886.6034,2045051.0,3.9213,188806.0265,48202343.0,Real_Annual
2,C&L01,Aberdeen City,2012-13,4.3317,9381.0432,2163756.0,3.7367,192795.1341,51624697.0,Real_Annual
3,C&L01,Aberdeen City,2013-14,4.0486,8988.3238,2222588.0,3.7112,197855.9712,53320837.0,Real_Annual
4,C&L01,Aberdeen City,2014-15,3.4734,8645.5731,2487138.0,3.4044,179623.6567,52705262.0,Real_Annual
...,...,...,...,...,...,...,...,...,...,...
62871,CORP10,West Lothian,2021-22;Q4,0.6900,685.0000,990.0,0.7700,17455.0000,22710.0,Real_Quarterly
62872,CORP10,West Lothian,2022-23;Q1,0.5700,480.0000,835.0,0.8700,19970.0000,23065.0,Real_Quarterly
62873,CORP10,West Lothian,2022-23;Q2,0.7900,650.0000,830.0,0.8700,19670.0000,22645.0,Real_Quarterly
62874,CORP10,West Lothian,2022-23;Q3,0.8400,705.0000,840.0,0.8900,20790.0000,23425.0,Real_Quarterly


### Clean and Merge Dataframes

In [146]:
#Fix for CHN14a and CHN14b which are not in decimal percentage format.
data['Value'] = np.where(data['Code'] == 'CHN14a', data['Value'] / 100, data['Value'])
data['Value'] = np.where(data['Code'] == 'CHN14b', data['Value'] / 100, data['Value'])
data['Scotland Value'] = np.where( data['Code'] == 'CHN14a', data['Scotland Value'] / 100, data['Scotland Value'])
data['Scotland Value'] = np.where( data['Code'] == 'CHN14b', data['Scotland Value'] / 100, data['Scotland Value'])

#Main file transformations
merged_data = data.merge(info, how='left', on='Code')
merged_data = merged_data.merge(fg, how='left', right_on=['Local_Authority', 'Type'], left_on=['LocalAuthority', 'FamilyGrouping'])

merged_data = merged_data[[
    'Code_Sortable',
    'LocalAuthority',
    'Period',
    'Value',
    'Numerator',
    'Denominator',
    'Scotland Value',
    'Scotland Numerator',
    'Scotland Denominator',
    'FamilyGrouping',
    'Ranking_Type',
    'Ranking_GoldilocksMidpoint',
    'Family_Group',
    'Numerator_Multipier',
    'Denominator_Multiplier',
    'Data Type'
]]
merged_data = merged_data.rename(columns={
    'Code_Sortable': 'Code',
    'FamilyGrouping': 'FG_Type'
})
merged_data['Numerator'] = merged_data['Numerator'] * merged_data['Numerator_Multipier']
merged_data['Scotland Numerator'] = merged_data['Scotland Numerator'] * merged_data['Numerator_Multipier']
merged_data['Denominator'] = merged_data['Denominator'] * merged_data['Denominator_Multiplier']
merged_data['Scotland Denominator'] = merged_data['Scotland Denominator'] * merged_data['Denominator_Multiplier']
merged_data.head(5)

Unnamed: 0,Code,LocalAuthority,Period,Value,Numerator,Denominator,Scotland Value,Scotland Numerator,Scotland Denominator,FG_Type,Ranking_Type,Ranking_GoldilocksMidpoint,Family_Group,Numerator_Multipier,Denominator_Multiplier,Data Type
0,C&L 01,Aberdeen City,2010-11,0.4067,785018.8,1922292.0,4.3749,198956051.7,45459818.0,"Environmental, Culture & Leisure, Economic Dev...",Ascending,,Family Group 4,1000.0,1.0,Real_Annual
1,C&L 01,Aberdeen City,2011-12,0.9227,1886603.4,2045051.0,3.9213,188806026.5,48202343.0,"Environmental, Culture & Leisure, Economic Dev...",Ascending,,Family Group 4,1000.0,1.0,Real_Annual
2,C&L 01,Aberdeen City,2012-13,4.3317,9381043.2,2163756.0,3.7367,192795134.1,51624697.0,"Environmental, Culture & Leisure, Economic Dev...",Ascending,,Family Group 4,1000.0,1.0,Real_Annual
3,C&L 01,Aberdeen City,2013-14,4.0486,8988323.8,2222588.0,3.7112,197855971.2,53320837.0,"Environmental, Culture & Leisure, Economic Dev...",Ascending,,Family Group 4,1000.0,1.0,Real_Annual
4,C&L 01,Aberdeen City,2014-15,3.4734,8645573.1,2487138.0,3.4044,179623656.7,52705262.0,"Environmental, Culture & Leisure, Economic Dev...",Ascending,,Family Group 4,1000.0,1.0,Real_Annual


### Average Calculations

#### Mean and Medians - Scottish

In [147]:
merged_data_averagecleaned = merged_data[[
                                'Code',
                                'LocalAuthority',
                                'Period',
                                'Value',
                                'Numerator',
                                'Denominator',
                                'FG_Type',
                                'Family_Group',
                                'Data Type'
                            ]]
ScotAverages_Mean = merged_data_averagecleaned.groupby(['Code', 'Period','Data Type'], as_index=False).mean()
ScotAverages_Median = merged_data_averagecleaned.groupby(['Code', 'Period','Data Type'], as_index=False).median()
ScotAverages = ScotAverages_Mean.merge(ScotAverages_Median, how='left', on=['Code', 'Period','Data Type'], suffixes=('_Mean', '_Median'))
ScotAverages

Unnamed: 0,Code,Period,Data Type,Value_Mean,Numerator_Mean,Denominator_Mean,Value_Median,Numerator_Median,Denominator_Median
0,C&L 01,2010-11,Cash_Annual,3.297813,5.045062e+06,1.420619e+06,3.440000,3626500.0,1.080558e+06
1,C&L 01,2010-11,Real_Annual,4.064119,6.217377e+06,1.420619e+06,4.239350,4469184.7,1.080558e+06
2,C&L 01,2011-12,Cash_Annual,3.134688,4.860000e+06,1.506323e+06,2.890000,3356500.0,1.099976e+06
3,C&L 01,2011-12,Real_Annual,3.805616,5.900188e+06,1.506323e+06,3.508550,4074893.4,1.099976e+06
4,C&L 01,2012-13,Cash_Annual,3.068750,5.062750e+06,1.613272e+06,2.685000,3774500.0,1.180585e+06
...,...,...,...,...,...,...,...,...,...
1999,SW 07,2022-23,Cash_Annual,0.770938,,,0.775000,,
2000,SW 07,2022-23,Real_Annual,0.770938,,,0.775000,,
2001,SW 08,2023;April,Real_Monthly,66.170863,1.030844e+03,1.494634e+01,63.559963,655.0,1.314550e+01
2002,SW 08,2023;June,Real_Monthly,65.811683,1.036531e+03,1.494634e+01,62.966610,687.5,1.314550e+01


#### Sum Num/Den - Scottish

In [148]:
merged_data_numdenaveragecleaned = merged_data[[
                                'Code',
                                'LocalAuthority',
                                'Period',
                                'Numerator',
                                'Denominator',
                                'FG_Type',
                                'Family_Group',
                                'Data Type'
                            ]]
ScotAverages_NumDenSums = merged_data_numdenaveragecleaned.groupby(['Code', 'Period', 'Data Type'], as_index=False).sum()
ScotAverages_NumDenSums['Scot_NumDenAv'] = ScotAverages_NumDenSums['Numerator'] / ScotAverages_NumDenSums['Denominator']
ScotAverages_NumDenSums = ScotAverages_NumDenSums[['Code', 'Period', 'Data Type', 'Scot_NumDenAv']]
ScotAverages_NumDenSums = ScotAverages_NumDenSums.replace([np.inf, -np.inf], np.nan)

ScotAverages = ScotAverages.merge(ScotAverages_NumDenSums, how='left', on=['Code', 'Period', 'Data Type'])
ScotAverages

Unnamed: 0,Code,Period,Data Type,Value_Mean,Numerator_Mean,Denominator_Mean,Value_Median,Numerator_Median,Denominator_Median,Scot_NumDenAv
0,C&L 01,2010-11,Cash_Annual,3.297813,5.045062e+06,1.420619e+06,3.440000,3626500.0,1.080558e+06,3.551312
1,C&L 01,2010-11,Real_Annual,4.064119,6.217377e+06,1.420619e+06,4.239350,4469184.7,1.080558e+06,4.376525
2,C&L 01,2011-12,Cash_Annual,3.134688,4.860000e+06,1.506323e+06,2.890000,3356500.0,1.099976e+06,3.226399
3,C&L 01,2011-12,Real_Annual,3.805616,5.900188e+06,1.506323e+06,3.508550,4074893.4,1.099976e+06,3.916947
4,C&L 01,2012-13,Cash_Annual,3.068750,5.062750e+06,1.613272e+06,2.685000,3774500.0,1.180585e+06,3.138188
...,...,...,...,...,...,...,...,...,...,...
1999,SW 07,2022-23,Cash_Annual,0.770938,,,0.775000,,,
2000,SW 07,2022-23,Real_Annual,0.770938,,,0.775000,,,
2001,SW 08,2023;April,Real_Monthly,66.170863,1.030844e+03,1.494634e+01,63.559963,655.0,1.314550e+01,68.969627
2002,SW 08,2023;June,Real_Monthly,65.811683,1.036531e+03,1.494634e+01,62.966610,687.5,1.314550e+01,69.350155


#### Add Keys & Sort Columns

In [149]:
ScotAverages['Key_CodePeriodDType'] = ScotAverages['Code'] + ScotAverages['Period'] + + ScotAverages['Data Type']
ScotAverages = ScotAverages[['Key_CodePeriodDType','Code', 'Period','Data Type', 'Value_Mean', 'Numerator_Mean', 'Denominator_Mean', 'Value_Median', 'Numerator_Median', 'Denominator_Median', 'Scot_NumDenAv']]
ScotAverages

Unnamed: 0,Key_CodePeriodDType,Code,Period,Data Type,Value_Mean,Numerator_Mean,Denominator_Mean,Value_Median,Numerator_Median,Denominator_Median,Scot_NumDenAv
0,C&L 012010-11Cash_Annual,C&L 01,2010-11,Cash_Annual,3.297813,5.045062e+06,1.420619e+06,3.440000,3626500.0,1.080558e+06,3.551312
1,C&L 012010-11Real_Annual,C&L 01,2010-11,Real_Annual,4.064119,6.217377e+06,1.420619e+06,4.239350,4469184.7,1.080558e+06,4.376525
2,C&L 012011-12Cash_Annual,C&L 01,2011-12,Cash_Annual,3.134688,4.860000e+06,1.506323e+06,2.890000,3356500.0,1.099976e+06,3.226399
3,C&L 012011-12Real_Annual,C&L 01,2011-12,Real_Annual,3.805616,5.900188e+06,1.506323e+06,3.508550,4074893.4,1.099976e+06,3.916947
4,C&L 012012-13Cash_Annual,C&L 01,2012-13,Cash_Annual,3.068750,5.062750e+06,1.613272e+06,2.685000,3774500.0,1.180585e+06,3.138188
...,...,...,...,...,...,...,...,...,...,...,...
1999,SW 072022-23Cash_Annual,SW 07,2022-23,Cash_Annual,0.770938,,,0.775000,,,
2000,SW 072022-23Real_Annual,SW 07,2022-23,Real_Annual,0.770938,,,0.775000,,,
2001,SW 082023;AprilReal_Monthly,SW 08,2023;April,Real_Monthly,66.170863,1.030844e+03,1.494634e+01,63.559963,655.0,1.314550e+01,68.969627
2002,SW 082023;JuneReal_Monthly,SW 08,2023;June,Real_Monthly,65.811683,1.036531e+03,1.494634e+01,62.966610,687.5,1.314550e+01,69.350155


#### Rename Columns

In [150]:
ScotAverages = ScotAverages.rename(columns={
    'Value_Mean': 'Value_Scot_Mean',
    'Numerator_Mean': 'Numerator_Scot_Mean',
    'Denominator_Mean': 'Denominator_Scot_Mean',
    'Value_Median': 'Value_Scot_Median',
    'Numerator_Median': 'Numerator_Scot_Median',
    'Denominator_Median': 'Denominator_Scot_Median'
})
ScotAverages

Unnamed: 0,Key_CodePeriodDType,Code,Period,Data Type,Value_Scot_Mean,Numerator_Scot_Mean,Denominator_Scot_Mean,Value_Scot_Median,Numerator_Scot_Median,Denominator_Scot_Median,Scot_NumDenAv
0,C&L 012010-11Cash_Annual,C&L 01,2010-11,Cash_Annual,3.297813,5.045062e+06,1.420619e+06,3.440000,3626500.0,1.080558e+06,3.551312
1,C&L 012010-11Real_Annual,C&L 01,2010-11,Real_Annual,4.064119,6.217377e+06,1.420619e+06,4.239350,4469184.7,1.080558e+06,4.376525
2,C&L 012011-12Cash_Annual,C&L 01,2011-12,Cash_Annual,3.134688,4.860000e+06,1.506323e+06,2.890000,3356500.0,1.099976e+06,3.226399
3,C&L 012011-12Real_Annual,C&L 01,2011-12,Real_Annual,3.805616,5.900188e+06,1.506323e+06,3.508550,4074893.4,1.099976e+06,3.916947
4,C&L 012012-13Cash_Annual,C&L 01,2012-13,Cash_Annual,3.068750,5.062750e+06,1.613272e+06,2.685000,3774500.0,1.180585e+06,3.138188
...,...,...,...,...,...,...,...,...,...,...,...
1999,SW 072022-23Cash_Annual,SW 07,2022-23,Cash_Annual,0.770938,,,0.775000,,,
2000,SW 072022-23Real_Annual,SW 07,2022-23,Real_Annual,0.770938,,,0.775000,,,
2001,SW 082023;AprilReal_Monthly,SW 08,2023;April,Real_Monthly,66.170863,1.030844e+03,1.494634e+01,63.559963,655.0,1.314550e+01,68.969627
2002,SW 082023;JuneReal_Monthly,SW 08,2023;June,Real_Monthly,65.811683,1.036531e+03,1.494634e+01,62.966610,687.5,1.314550e+01,69.350155


#### Output File

In [151]:
ScotAverages.to_csv('Data Files\\Scottish Averages.csv', index=False, encoding='utf-8-sig')

#### Mean and Medians - Family Groups

In [152]:
FGAverages_Mean = merged_data_averagecleaned.groupby(['Code', 'Family_Group', 'Period', 'Data Type'], as_index=False).mean()
FGAverages_Median = merged_data_averagecleaned.groupby(['Code', 'Family_Group', 'Period', 'Data Type'], as_index=False).median()
FGAverages = FGAverages_Mean.merge(FGAverages_Median, how='left', on=['Code','Family_Group','Period', 'Data Type'], suffixes=('_Mean', '_Median'))
FGAverages

Unnamed: 0,Code,Family_Group,Period,Data Type,Value_Mean,Numerator_Mean,Denominator_Mean,Value_Median,Numerator_Median,Denominator_Median
0,C&L 01,Family Group 1,2010-11,Cash_Annual,3.011250,3.076125e+06,9.231241e+05,3.245000,1955000.00,695444.0000
1,C&L 01,Family Group 1,2010-11,Real_Annual,3.710975,3.790920e+06,9.231241e+05,3.999050,2409280.65,695444.0000
2,C&L 01,Family Group 1,2011-12,Cash_Annual,3.038750,3.213375e+06,1.036476e+06,2.865000,1825500.00,707515.5000
3,C&L 01,Family Group 1,2011-12,Real_Annual,3.689150,3.901135e+06,1.036476e+06,3.478200,2216212.70,707515.5000
4,C&L 01,Family Group 1,2012-13,Cash_Annual,2.771250,3.172625e+06,1.065939e+06,2.605000,1873000.00,714443.0000
...,...,...,...,...,...,...,...,...,...,...
8011,SW 08,Family Group 3,2023;June,Real_Monthly,80.714824,1.373250e+03,1.777112e+01,73.809688,1439.00,14.5625
8012,SW 08,Family Group 3,2023;May,Real_Monthly,78.043044,1.394125e+03,1.777112e+01,84.010110,1484.00,14.5625
8013,SW 08,Family Group 4,2023;April,Real_Monthly,78.222631,1.093625e+03,1.479875e+01,70.866845,797.50,11.6295
8014,SW 08,Family Group 4,2023;June,Real_Monthly,70.536834,1.060500e+03,1.479875e+01,67.612599,735.50,11.6295


#### Sum Num/Den - Family Groups

In [153]:
merged_data_numdenaveragecleaned = merged_data[[
                                                    'Code',
                                                    'LocalAuthority',
                                                    'Period',
                                                    'Numerator',
                                                    'Denominator',
                                                    'FG_Type',
                                                    'Family_Group',
                                                    'Data Type'
                                                ]]
FGAverages_NumDenSums = merged_data_numdenaveragecleaned.groupby(['Code', 'Family_Group', 'Period', 'Data Type'], as_index=False).sum()
FGAverages_NumDenSums['FG_NumDenAv'] = FGAverages_NumDenSums['Numerator'] / FGAverages_NumDenSums['Denominator']
FGAverages_NumDenSums = FGAverages_NumDenSums[['Code', 'Family_Group', 'Period','Data Type', 'FG_NumDenAv']]
FGAverages_NumDenSums = FGAverages_NumDenSums.replace([np.inf, -np.inf], np.nan)

FGAverages = FGAverages.merge(FGAverages_NumDenSums, how='left', on=['Code', 'Family_Group', 'Period', 'Data Type'])
FGAverages

Unnamed: 0,Code,Family_Group,Period,Data Type,Value_Mean,Numerator_Mean,Denominator_Mean,Value_Median,Numerator_Median,Denominator_Median,FG_NumDenAv
0,C&L 01,Family Group 1,2010-11,Cash_Annual,3.011250,3.076125e+06,9.231241e+05,3.245000,1955000.00,695444.0000,3.332298
1,C&L 01,Family Group 1,2010-11,Real_Annual,3.710975,3.790920e+06,9.231241e+05,3.999050,2409280.65,695444.0000,4.106620
2,C&L 01,Family Group 1,2011-12,Cash_Annual,3.038750,3.213375e+06,1.036476e+06,2.865000,1825500.00,707515.5000,3.100289
3,C&L 01,Family Group 1,2011-12,Real_Annual,3.689150,3.901135e+06,1.036476e+06,3.478200,2216212.70,707515.5000,3.763845
4,C&L 01,Family Group 1,2012-13,Cash_Annual,2.771250,3.172625e+06,1.065939e+06,2.605000,1873000.00,714443.0000,2.976366
...,...,...,...,...,...,...,...,...,...,...,...
8011,SW 08,Family Group 3,2023;June,Real_Monthly,80.714824,1.373250e+03,1.777112e+01,73.809688,1439.00,14.5625,77.274230
8012,SW 08,Family Group 3,2023;May,Real_Monthly,78.043044,1.394125e+03,1.777112e+01,84.010110,1484.00,14.5625,78.448888
8013,SW 08,Family Group 4,2023;April,Real_Monthly,78.222631,1.093625e+03,1.479875e+01,70.866845,797.50,11.6295,73.899823
8014,SW 08,Family Group 4,2023;June,Real_Monthly,70.536834,1.060500e+03,1.479875e+01,67.612599,735.50,11.6295,71.661458


#### Add Keys & Sort Columns

In [154]:
FGAverages['Key_CodePeriodDType'] = FGAverages['Code'] + FGAverages['Period']
FGAverages = FGAverages[['Key_CodePeriodDType', 'Code', 'Period', 'Data Type', 'Family_Group', 'Value_Mean', 'Numerator_Mean', 'Denominator_Mean', 'Value_Median', 'Numerator_Median', 'Denominator_Median', 'FG_NumDenAv']]
FGAverages

Unnamed: 0,Key_CodePeriodDType,Code,Period,Data Type,Family_Group,Value_Mean,Numerator_Mean,Denominator_Mean,Value_Median,Numerator_Median,Denominator_Median,FG_NumDenAv
0,C&L 012010-11,C&L 01,2010-11,Cash_Annual,Family Group 1,3.011250,3.076125e+06,9.231241e+05,3.245000,1955000.00,695444.0000,3.332298
1,C&L 012010-11,C&L 01,2010-11,Real_Annual,Family Group 1,3.710975,3.790920e+06,9.231241e+05,3.999050,2409280.65,695444.0000,4.106620
2,C&L 012011-12,C&L 01,2011-12,Cash_Annual,Family Group 1,3.038750,3.213375e+06,1.036476e+06,2.865000,1825500.00,707515.5000,3.100289
3,C&L 012011-12,C&L 01,2011-12,Real_Annual,Family Group 1,3.689150,3.901135e+06,1.036476e+06,3.478200,2216212.70,707515.5000,3.763845
4,C&L 012012-13,C&L 01,2012-13,Cash_Annual,Family Group 1,2.771250,3.172625e+06,1.065939e+06,2.605000,1873000.00,714443.0000,2.976366
...,...,...,...,...,...,...,...,...,...,...,...,...
8011,SW 082023;June,SW 08,2023;June,Real_Monthly,Family Group 3,80.714824,1.373250e+03,1.777112e+01,73.809688,1439.00,14.5625,77.274230
8012,SW 082023;May,SW 08,2023;May,Real_Monthly,Family Group 3,78.043044,1.394125e+03,1.777112e+01,84.010110,1484.00,14.5625,78.448888
8013,SW 082023;April,SW 08,2023;April,Real_Monthly,Family Group 4,78.222631,1.093625e+03,1.479875e+01,70.866845,797.50,11.6295,73.899823
8014,SW 082023;June,SW 08,2023;June,Real_Monthly,Family Group 4,70.536834,1.060500e+03,1.479875e+01,67.612599,735.50,11.6295,71.661458


#### Rename Columns

In [155]:
FGAverages = FGAverages.rename(columns = {
    'Value_Mean': 'Value_FG_Mean',
    'Numerator_Mean': 'Numerator_FG_Mean',
    'Denominator_Mean': 'Denominator_FG_Mean',
    'Value_Median': 'Value_FG_Median',
    'Numerator_Median': 'Numerator_FG_Median',
    'Denominator_Median': 'Denominator_FG_Median'
})

#### Output File

In [156]:
FGAverages.to_csv('Data Files\\Family Averages.csv',index=False, encoding='utf-8-sig')

### Separate Scottish Values

In [157]:
ScotValues = merged_data.copy(deep=True)
ScotValues = ScotValues[['Code', 'Period', 'Scotland Value', 'Scotland Numerator', 'Scotland Denominator', 'Data Type']]
ScotValues = ScotValues.groupby(['Code', 'Period', 'Data Type'], as_index=False).first()
ScotValues.to_csv('Data Files\\Scottish Values.csv',index=False, encoding='utf-8-sig')
ScotValues

Unnamed: 0,Code,Period,Data Type,Scotland Value,Scotland Numerator,Scotland Denominator
0,C&L 01,2010-11,Cash_Annual,3.550000,161442000.0,4.545982e+07
1,C&L 01,2010-11,Real_Annual,4.374900,198956051.7,4.545982e+07
2,C&L 01,2011-12,Cash_Annual,3.230000,155520000.0,4.820234e+07
3,C&L 01,2011-12,Real_Annual,3.921300,188806026.5,4.820234e+07
4,C&L 01,2012-13,Cash_Annual,3.140000,162008000.0,5.162470e+07
...,...,...,...,...,...,...
1999,SW 07,2022-23,Cash_Annual,0.750000,,
2000,SW 07,2022-23,Real_Annual,0.750000,,
2001,SW 08,2023;April,Real_Monthly,68.969627,32987.0,4.782830e+02
2002,SW 08,2023;June,Real_Monthly,69.350155,33169.0,4.782830e+02


### Ranking Calculations

In [158]:
FamilyRanks = merged_data[[
    'Code',
    'LocalAuthority',
    'Period',
    'Value',
    'Family_Group',
    'Ranking_GoldilocksMidpoint',
    'Data Type'
]]
FamilyRanks = FamilyRanks.copy(deep=True)

#### Family Group Ranks

In [159]:
FamilyRanks = merged_data[[
                'Code',
                'LocalAuthority',
                'Period',
                'Value',
                'Family_Group',
                'Ranking_GoldilocksMidpoint',
                'Data Type'
            ]]
FamilyRanks = FamilyRanks.copy(deep=True)
FamilyRanksGoldi = FamilyRanks.copy(deep=True)
FamilyRanks['FamilyRank_Desc'] = FamilyRanks.groupby(['Code', 'Period', 'Family_Group', 'Data Type'])['Value'].rank('min', ascending=False).astype(int)
FamilyRanks['FamilyRank_Asc'] = FamilyRanks.groupby(['Code', 'Period', 'Family_Group', 'Data Type'])['Value'].rank('min', ascending=True).astype(int)
FamilyRanks['FamilyRank_Desc_Pct'] = FamilyRanks.groupby(['Code', 'Period', 'Family_Group', 'Data Type'])['Value'].rank('min', ascending=False, pct=True).astype(float)
FamilyRanks['FamilyRank_Asc_Pct'] = FamilyRanks.groupby(['Code', 'Period', 'Family_Group', 'Data Type'])['Value'].rank('min', ascending=True, pct=True).astype(float)


def distance(Current, Previous):
    return (max(Previous, Current) - min(Previous, Current)) * (-1 if Previous > Current else 1)

def DifferenceFromGoldilocksMidPoint(df):
    if df['Ranking_GoldilocksMidpoint'] == None:
        return None
    else:
        return abs(distance(df['Value'], df['Ranking_GoldilocksMidpoint']))


FamilyRanksGoldi['AbsoluteDifferenceFromGoldilocksMidPoint'] = FamilyRanksGoldi.apply(DifferenceFromGoldilocksMidPoint, axis=1)
FamilyRanksGoldi = FamilyRanksGoldi[pd.notnull(FamilyRanksGoldi['AbsoluteDifferenceFromGoldilocksMidPoint'])]
FamilyRanksGoldi['FamilyRank_Goldi'] = FamilyRanksGoldi.groupby(['Code', 'Period', 'Family_Group', 'Data Type'])['AbsoluteDifferenceFromGoldilocksMidPoint'].rank('min', ascending=True).astype(int)
FamilyRanksGoldi['FamilyRank_Goldi_Pct'] = FamilyRanksGoldi.groupby(['Code', 'Period', 'Family_Group', 'Data Type'])['AbsoluteDifferenceFromGoldilocksMidPoint'].rank('min', ascending=True, pct=True).astype(float)

FamilyRanks = FamilyRanks.merge(FamilyRanksGoldi[['Code', 'Period', 'LocalAuthority', 'FamilyRank_Goldi', 'FamilyRank_Goldi_Pct', 'Data Type']], how='left', on=['Code', 'Period', 'LocalAuthority', 'Data Type'], suffixes=('_FamilyRank', '_Goldi'))
FamilyRanks = FamilyRanks.merge(info, how = 'left', left_on= 'Code', right_on = 'Code_Sortable')

# Define functions needed to select correct ranking type and percentile type
def FamilyRank_select(df):
    if df['Ranking_Type'] == "Ascending":
        return df['FamilyRank_Asc']
    elif df['Ranking_Type'] == "Descending":
        return df['FamilyRank_Desc']
    elif df['Ranking_Type'] == "Goldilocks":
        return df['FamilyRank_Goldi']
    else:
        return None


def FamilyRank_Pct_select(df):
    if df['Ranking_Type'] == "Ascending":
        return df['FamilyRank_Asc_Pct']
    elif df['Ranking_Type'] == "Descending":
        return df['FamilyRank_Desc_Pct']
    elif df['Ranking_Type'] == "Goldilocks":
        return df['FamilyRank_Goldi_Pct']
    else:
        return None


# Apply functions above to create two new columns that contain the correct rank and percentile for each row
FamilyRanks['FamilyRank'] = FamilyRanks.apply(FamilyRank_select, axis=1)
FamilyRanks['FamilyPct'] = FamilyRanks.apply(FamilyRank_Pct_select, axis=1)

FamilyRanks = FamilyRanks[[
                'Code_x',
                'LocalAuthority',
                'Period',
                'FamilyRank',
                'FamilyPct',
                'Data Type'
            ]]
FamilyRanks = FamilyRanks.rename(columns = {'Code_x': 'Code'})


FamilyRanks

Unnamed: 0,Code,LocalAuthority,Period,FamilyRank,FamilyPct,Data Type
0,C&L 01,Aberdeen City,2010-11,1.0,0.125,Real_Annual
1,C&L 01,Aberdeen City,2011-12,1.0,0.125,Real_Annual
2,C&L 01,Aberdeen City,2012-13,5.0,0.625,Real_Annual
3,C&L 01,Aberdeen City,2013-14,5.0,0.625,Real_Annual
4,C&L 01,Aberdeen City,2014-15,5.0,0.625,Real_Annual
...,...,...,...,...,...,...
62871,CORP 10,West Lothian,2021-22;Q4,8.0,1.000,Real_Quarterly
62872,CORP 10,West Lothian,2022-23;Q1,8.0,1.000,Real_Quarterly
62873,CORP 10,West Lothian,2022-23;Q2,7.0,1.000,Real_Quarterly
62874,CORP 10,West Lothian,2022-23;Q3,7.0,1.000,Real_Quarterly


#### Scottish Ranks

In [160]:
ScotRanks = merged_data[[
                'Code',
                'LocalAuthority',
                'Period',
                'Value',
                'Ranking_GoldilocksMidpoint',
                'Data Type'
            ]]
ScotRanks = ScotRanks.copy(deep=True)
ScotRanksGoldi = ScotRanks.copy(deep=True)

ScotRanks['ScotRank_Desc'] = ScotRanks.groupby(['Code', 'Period', 'Data Type'])['Value'].rank('min', ascending=False).astype(int)
ScotRanks['ScotRank_Asc'] = ScotRanks.groupby(['Code', 'Period', 'Data Type'])['Value'].rank('min', ascending=True).astype(int)
ScotRanks['ScotRank_Desc_Pct'] = ScotRanks.groupby(['Code', 'Period','Data Type'])['Value'].rank('min', ascending=False, pct=True).astype(float)
ScotRanks['ScotRank_Asc_Pct'] = ScotRanks.groupby(['Code', 'Period', 'Data Type'])['Value'].rank('min', ascending=True, pct=True).astype(float)


def distance(Current, Previous):
    return (max(Previous, Current) - min(Previous, Current)) * (-1 if Previous > Current else 1)

def DifferenceFromGoldilocksMidPoint(df):
    if df['Ranking_GoldilocksMidpoint'] == None:
        return None
    else:
        return abs(distance(df['Value'], df['Ranking_GoldilocksMidpoint']))


ScotRanksGoldi['AbsoluteDifferenceFromGoldilocksMidPoint'] = ScotRanks.apply(DifferenceFromGoldilocksMidPoint, axis=1)
ScotRanksGoldi = ScotRanksGoldi[pd.notnull(ScotRanksGoldi['AbsoluteDifferenceFromGoldilocksMidPoint'])]
ScotRanksGoldi['ScotRank_Goldi'] = ScotRanksGoldi.groupby(['Code', 'Period', 'Data Type'])['AbsoluteDifferenceFromGoldilocksMidPoint'].rank('min', ascending=True).astype(int)
ScotRanksGoldi['ScotRank_Goldi_Pct'] = ScotRanksGoldi.groupby(['Code', 'Period', 'Data Type'])['AbsoluteDifferenceFromGoldilocksMidPoint'].rank('min', ascending=True, pct=True).astype(float)

ScotRanks = ScotRanks.merge(ScotRanksGoldi[['Code', 'Period', 'LocalAuthority', 'ScotRank_Goldi', 'ScotRank_Goldi_Pct', 'Data Type']], how='left', on=['Code', 'Period', 'LocalAuthority', 'Data Type'], suffixes=('_ScotRank', '_Goldi'))

ScotRanks = ScotRanks.merge(info, how='left', left_on='Code', right_on='Code_Sortable')

# Define functions needed to select correct ranking type and percentile type


def ScotRank_select(df):
    if df['Ranking_Type'] == "Ascending":
        return df['ScotRank_Asc']
    elif df['Ranking_Type'] == "Descending":
        return df['ScotRank_Desc']
    elif df['Ranking_Type'] == "Goldilocks":
        return df['ScotRank_Goldi']
    else:
        return None


def ScotRank_Pct_select(df):
    if df['Ranking_Type'] == "Ascending":
        return df['ScotRank_Asc_Pct']
    elif df['Ranking_Type'] == "Descending":
        return df['ScotRank_Desc_Pct']
    elif df['Ranking_Type'] == "Goldilocks":
        return df['ScotRank_Goldi_Pct']
    else:
        return None


# Apply functions above to create two new columns that contain the correct rank and percentile for each row
ScotRanks['ScotRank'] = ScotRanks.apply(ScotRank_select, axis=1)
ScotRanks['ScotPct'] = ScotRanks.apply(ScotRank_Pct_select, axis=1)

ScotRanks = ScotRanks[[
    'Code_x',
    'LocalAuthority',
    'Period',
    'ScotRank',
    'ScotPct',
    'Data Type'
]]

ScotRanks = ScotRanks.rename(columns={'Code_x': 'Code'})

ScotRanks


Unnamed: 0,Code,LocalAuthority,Period,ScotRank,ScotPct,Data Type
0,C&L 01,Aberdeen City,2010-11,1.0,0.031250,Real_Annual
1,C&L 01,Aberdeen City,2011-12,1.0,0.031250,Real_Annual
2,C&L 01,Aberdeen City,2012-13,24.0,0.750000,Real_Annual
3,C&L 01,Aberdeen City,2013-14,23.0,0.718750,Real_Annual
4,C&L 01,Aberdeen City,2014-15,20.0,0.625000,Real_Annual
...,...,...,...,...,...,...
62871,CORP 10,West Lothian,2021-22;Q4,27.0,0.843750,Real_Quarterly
62872,CORP 10,West Lothian,2022-23;Q1,29.0,0.906250,Real_Quarterly
62873,CORP 10,West Lothian,2022-23;Q2,24.0,0.774194,Real_Quarterly
62874,CORP 10,West Lothian,2022-23;Q3,24.0,0.774194,Real_Quarterly


#### Merge Rankings into Main Data

In [161]:
IndicatorDataMain = merged_data.merge(FamilyRanks, how='left', on=['Code', 'LocalAuthority', 'Period', 'Data Type'])
IndicatorDataMain = IndicatorDataMain.merge(ScotRanks, how='left', on=['Code', 'LocalAuthority', 'Period', 'Data Type'])
IndicatorDataMain.head(5)

Unnamed: 0,Code,LocalAuthority,Period,Value,Numerator,Denominator,Scotland Value,Scotland Numerator,Scotland Denominator,FG_Type,Ranking_Type,Ranking_GoldilocksMidpoint,Family_Group,Numerator_Multipier,Denominator_Multiplier,Data Type,FamilyRank,FamilyPct,ScotRank,ScotPct
0,C&L 01,Aberdeen City,2010-11,0.4067,785018.8,1922292.0,4.3749,198956051.7,45459818.0,"Environmental, Culture & Leisure, Economic Dev...",Ascending,,Family Group 4,1000.0,1.0,Real_Annual,1.0,0.125,1.0,0.03125
1,C&L 01,Aberdeen City,2011-12,0.9227,1886603.4,2045051.0,3.9213,188806026.5,48202343.0,"Environmental, Culture & Leisure, Economic Dev...",Ascending,,Family Group 4,1000.0,1.0,Real_Annual,1.0,0.125,1.0,0.03125
2,C&L 01,Aberdeen City,2012-13,4.3317,9381043.2,2163756.0,3.7367,192795134.1,51624697.0,"Environmental, Culture & Leisure, Economic Dev...",Ascending,,Family Group 4,1000.0,1.0,Real_Annual,5.0,0.625,24.0,0.75
3,C&L 01,Aberdeen City,2013-14,4.0486,8988323.8,2222588.0,3.7112,197855971.2,53320837.0,"Environmental, Culture & Leisure, Economic Dev...",Ascending,,Family Group 4,1000.0,1.0,Real_Annual,5.0,0.625,23.0,0.71875
4,C&L 01,Aberdeen City,2014-15,3.4734,8645573.1,2487138.0,3.4044,179623656.7,52705262.0,"Environmental, Culture & Leisure, Economic Dev...",Ascending,,Family Group 4,1000.0,1.0,Real_Annual,5.0,0.625,20.0,0.625


### Main Data File Transformations

#### Add Keys

In [162]:
IndicatorDataMain['Key_CodePeriodDType'] = IndicatorDataMain['Code'] + IndicatorDataMain['Period'] + IndicatorDataMain['Data Type']
IndicatorDataMain['Key_CodePeriodFamilyGroupDType'] = IndicatorDataMain['Code'] + IndicatorDataMain['Period'] + IndicatorDataMain['Family_Group'] + IndicatorDataMain['Data Type']
IndicatorDataMain['Key_CodePeriodLADType'] = IndicatorDataMain['Code'] + IndicatorDataMain['Period'] + IndicatorDataMain['LocalAuthority'] + IndicatorDataMain['Data Type']
IndicatorDataMain.head(5)

Unnamed: 0,Code,LocalAuthority,Period,Value,Numerator,Denominator,Scotland Value,Scotland Numerator,Scotland Denominator,FG_Type,...,Numerator_Multipier,Denominator_Multiplier,Data Type,FamilyRank,FamilyPct,ScotRank,ScotPct,Key_CodePeriodDType,Key_CodePeriodFamilyGroupDType,Key_CodePeriodLADType
0,C&L 01,Aberdeen City,2010-11,0.4067,785018.8,1922292.0,4.3749,198956051.7,45459818.0,"Environmental, Culture & Leisure, Economic Dev...",...,1000.0,1.0,Real_Annual,1.0,0.125,1.0,0.03125,C&L 012010-11Real_Annual,C&L 012010-11Family Group 4Real_Annual,C&L 012010-11Aberdeen CityReal_Annual
1,C&L 01,Aberdeen City,2011-12,0.9227,1886603.4,2045051.0,3.9213,188806026.5,48202343.0,"Environmental, Culture & Leisure, Economic Dev...",...,1000.0,1.0,Real_Annual,1.0,0.125,1.0,0.03125,C&L 012011-12Real_Annual,C&L 012011-12Family Group 4Real_Annual,C&L 012011-12Aberdeen CityReal_Annual
2,C&L 01,Aberdeen City,2012-13,4.3317,9381043.2,2163756.0,3.7367,192795134.1,51624697.0,"Environmental, Culture & Leisure, Economic Dev...",...,1000.0,1.0,Real_Annual,5.0,0.625,24.0,0.75,C&L 012012-13Real_Annual,C&L 012012-13Family Group 4Real_Annual,C&L 012012-13Aberdeen CityReal_Annual
3,C&L 01,Aberdeen City,2013-14,4.0486,8988323.8,2222588.0,3.7112,197855971.2,53320837.0,"Environmental, Culture & Leisure, Economic Dev...",...,1000.0,1.0,Real_Annual,5.0,0.625,23.0,0.71875,C&L 012013-14Real_Annual,C&L 012013-14Family Group 4Real_Annual,C&L 012013-14Aberdeen CityReal_Annual
4,C&L 01,Aberdeen City,2014-15,3.4734,8645573.1,2487138.0,3.4044,179623656.7,52705262.0,"Environmental, Culture & Leisure, Economic Dev...",...,1000.0,1.0,Real_Annual,5.0,0.625,20.0,0.625,C&L 012014-15Real_Annual,C&L 012014-15Family Group 4Real_Annual,C&L 012014-15Aberdeen CityReal_Annual


#### Select Only Required Columns

In [163]:
IndicatorDataMain = IndicatorDataMain[
                                        [
                                            'Key_CodePeriodDType',
                                            'Key_CodePeriodFamilyGroupDType',
                                            'Key_CodePeriodLADType',
                                            'Code',
                                            'LocalAuthority',
                                            'Period',
                                            'Value',
                                            'Numerator',
                                            'Denominator',
                                            'FamilyRank',
                                            'FamilyPct',
                                            'ScotRank',
                                            'ScotPct',
                                            'Data Type'
                                        ]
                                    ]
IndicatorDataMain

Unnamed: 0,Key_CodePeriodDType,Key_CodePeriodFamilyGroupDType,Key_CodePeriodLADType,Code,LocalAuthority,Period,Value,Numerator,Denominator,FamilyRank,FamilyPct,ScotRank,ScotPct,Data Type
0,C&L 012010-11Real_Annual,C&L 012010-11Family Group 4Real_Annual,C&L 012010-11Aberdeen CityReal_Annual,C&L 01,Aberdeen City,2010-11,0.4067,785018.8,1922292.0,1.0,0.125,1.0,0.031250,Real_Annual
1,C&L 012011-12Real_Annual,C&L 012011-12Family Group 4Real_Annual,C&L 012011-12Aberdeen CityReal_Annual,C&L 01,Aberdeen City,2011-12,0.9227,1886603.4,2045051.0,1.0,0.125,1.0,0.031250,Real_Annual
2,C&L 012012-13Real_Annual,C&L 012012-13Family Group 4Real_Annual,C&L 012012-13Aberdeen CityReal_Annual,C&L 01,Aberdeen City,2012-13,4.3317,9381043.2,2163756.0,5.0,0.625,24.0,0.750000,Real_Annual
3,C&L 012013-14Real_Annual,C&L 012013-14Family Group 4Real_Annual,C&L 012013-14Aberdeen CityReal_Annual,C&L 01,Aberdeen City,2013-14,4.0486,8988323.8,2222588.0,5.0,0.625,23.0,0.718750,Real_Annual
4,C&L 012014-15Real_Annual,C&L 012014-15Family Group 4Real_Annual,C&L 012014-15Aberdeen CityReal_Annual,C&L 01,Aberdeen City,2014-15,3.4734,8645573.1,2487138.0,5.0,0.625,20.0,0.625000,Real_Annual
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62871,CORP 102021-22;Q4Real_Quarterly,CORP 102021-22;Q4Family Group 3Real_Quarterly,CORP 102021-22;Q4West LothianReal_Quarterly,CORP 10,West Lothian,2021-22;Q4,0.6900,685.0,990.0,8.0,1.000,27.0,0.843750,Real_Quarterly
62872,CORP 102022-23;Q1Real_Quarterly,CORP 102022-23;Q1Family Group 3Real_Quarterly,CORP 102022-23;Q1West LothianReal_Quarterly,CORP 10,West Lothian,2022-23;Q1,0.5700,480.0,835.0,8.0,1.000,29.0,0.906250,Real_Quarterly
62873,CORP 102022-23;Q2Real_Quarterly,CORP 102022-23;Q2Family Group 3Real_Quarterly,CORP 102022-23;Q2West LothianReal_Quarterly,CORP 10,West Lothian,2022-23;Q2,0.7900,650.0,830.0,7.0,1.000,24.0,0.774194,Real_Quarterly
62874,CORP 102022-23;Q3Real_Quarterly,CORP 102022-23;Q3Family Group 3Real_Quarterly,CORP 102022-23;Q3West LothianReal_Quarterly,CORP 10,West Lothian,2022-23;Q3,0.8400,705.0,840.0,7.0,1.000,24.0,0.774194,Real_Quarterly


#### Previous Row & First Row Dictionaries

In [164]:
#Sort rows of the all_LGBFData dataframe to ensure that we are getting the correct previous and first rows in the subsequent steps. This may not be strictly necessary as the data should already be in the correct sort order. It is here as a safeguard in case any sorting needs to be done in previous steps at a later date.
IndicatorDataMain = IndicatorDataMain.copy(deep=True)
IndicatorDataMain = IndicatorDataMain.rename(columns={'Data Type': 'Data_Type'})
IndicatorDataMain.sort_values(by=['Data_Type', 'LocalAuthority', 'Code', 'Period'], inplace=True)


#Define all variables that will be used to record the changes from previous/first.
Previouss = []
Previous = None
Firsts = []
First = None
First_Save = None
Local_Authority = ""
Data_Type = ""
Code = ""
Period = ""
Value = ""
Numerator = ""
Denominator = ""
ScotRank = ""
ScotPct = ""
FamilyRank = ""
FamilyPct = ""

# Loop over the all_LGBFDataframe and record previous and first into a python dictionary object. Save the objects for each row into the two list variables (Previouss and Firsts)
for row in IndicatorDataMain.itertuples():
    # If the curently stored Local_Authority and Code are both equal to the current row then this is not the first row for this indicator and local authority combination. As such Previous is calculated using all of the currently stored values in the variables (these are written to at the end of each loop) and First is populated using the stored dictionary in First_Save
    if Local_Authority == row.LocalAuthority and Code == row.Code and Data_Type == row.Data_Type :
        Previous = {
            'Value': Value,
            'Numerator': Numerator,
            'Denominator': Denominator,
            'ScotRank': ScotRank,
            'ScotPct': ScotPct,
            'FamilyRank': FamilyRank,
            'FamilyPct': FamilyPct
        }
        First = First_Save

    # If the curently stored Local_Authority and Code are both not equal to the current row then this is the first row for this indicator and local authority combination. as such the Previous object is set to None and the First object is populated using this rows values.
    elif Local_Authority != row.LocalAuthority or Code != row.Code or Data_Type != row.Data_Type :
        First_Save = {
            'Value': row.Value,
            'Numerator': row.Numerator,
            'Denominator': row.Denominator,
            'ScotRank': row.ScotRank,
            'ScotPct': row.ScotPct,
            'FamilyRank': row.FamilyRank,
            'FamilyPct': row.FamilyPct
        }
        First = None
        Previous = None

    # Append the First and Previous into their respective list variables.
    Previouss.append(Previous)
    Firsts.append(First)

    # Set all other variables to their respective columns values in the current row. This is used to both evaluate the if criteria above and to populate the next previous object.
    Data_Type = row.Data_Type
    Local_Authority = row.LocalAuthority
    Code = row.Code
    Period = row.Period
    Value = row.Value
    Numerator = row.Numerator
    Denominator = row.Denominator
    ScotRank = row.ScotRank
    ScotPct = row.ScotPct
    FamilyRank = row.FamilyRank
    FamilyPct = row.FamilyPct

# Assign the two list variables into appropriately titled columns within our all_LGBFData dataframe
IndicatorDataMain['Previous_Row'] = Previouss
IndicatorDataMain['First_Row'] = Firsts

IndicatorDataMain = IndicatorDataMain.rename(columns={'Data_Type': 'Data Type'})

IndicatorDataMain


Unnamed: 0,Key_CodePeriodDType,Key_CodePeriodFamilyGroupDType,Key_CodePeriodLADType,Code,LocalAuthority,Period,Value,Numerator,Denominator,FamilyRank,FamilyPct,ScotRank,ScotPct,Data Type,Previous_Row,First_Row
30609,C&L 012010-11Cash_Annual,C&L 012010-11Family Group 4Cash_Annual,C&L 012010-11Aberdeen CityCash_Annual,C&L 01,Aberdeen City,2010-11,0.33,637000.0,1922292.0,1.0,0.125,1.0,0.031250,Cash_Annual,,
30610,C&L 012011-12Cash_Annual,C&L 012011-12Family Group 4Cash_Annual,C&L 012011-12Aberdeen CityCash_Annual,C&L 01,Aberdeen City,2011-12,0.76,1554000.0,2045051.0,1.0,0.125,1.0,0.031250,Cash_Annual,"{'Value': 0.33, 'Numerator': 637000.0, 'Denomi...","{'Value': 0.33, 'Numerator': 637000.0, 'Denomi..."
30611,C&L 012012-13Cash_Annual,C&L 012012-13Family Group 4Cash_Annual,C&L 012012-13Aberdeen CityCash_Annual,C&L 01,Aberdeen City,2012-13,3.64,7883000.0,2163756.0,5.0,0.625,24.0,0.750000,Cash_Annual,"{'Value': 0.76, 'Numerator': 1554000.0, 'Denom...","{'Value': 0.33, 'Numerator': 637000.0, 'Denomi..."
30612,C&L 012013-14Cash_Annual,C&L 012013-14Family Group 4Cash_Annual,C&L 012013-14Aberdeen CityCash_Annual,C&L 01,Aberdeen City,2013-14,3.48,7726000.0,2222588.0,5.0,0.625,23.0,0.718750,Cash_Annual,"{'Value': 3.64, 'Numerator': 7883000.0, 'Denom...","{'Value': 0.33, 'Numerator': 637000.0, 'Denomi..."
30613,C&L 012014-15Cash_Annual,C&L 012014-15Family Group 4Cash_Annual,C&L 012014-15Aberdeen CityCash_Annual,C&L 01,Aberdeen City,2014-15,3.02,7517000.0,2487138.0,5.0,0.625,20.0,0.625000,Cash_Annual,"{'Value': 3.48, 'Numerator': 7726000.0, 'Denom...","{'Value': 0.33, 'Numerator': 637000.0, 'Denomi..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62871,CORP 102021-22;Q4Real_Quarterly,CORP 102021-22;Q4Family Group 3Real_Quarterly,CORP 102021-22;Q4West LothianReal_Quarterly,CORP 10,West Lothian,2021-22;Q4,0.69,685.0,990.0,8.0,1.000,27.0,0.843750,Real_Quarterly,"{'Value': 0.68, 'Numerator': 340.0, 'Denominat...","{'Value': 0.98, 'Numerator': 540.0, 'Denominat..."
62872,CORP 102022-23;Q1Real_Quarterly,CORP 102022-23;Q1Family Group 3Real_Quarterly,CORP 102022-23;Q1West LothianReal_Quarterly,CORP 10,West Lothian,2022-23;Q1,0.57,480.0,835.0,8.0,1.000,29.0,0.906250,Real_Quarterly,"{'Value': 0.69, 'Numerator': 685.0, 'Denominat...","{'Value': 0.98, 'Numerator': 540.0, 'Denominat..."
62873,CORP 102022-23;Q2Real_Quarterly,CORP 102022-23;Q2Family Group 3Real_Quarterly,CORP 102022-23;Q2West LothianReal_Quarterly,CORP 10,West Lothian,2022-23;Q2,0.79,650.0,830.0,7.0,1.000,24.0,0.774194,Real_Quarterly,"{'Value': 0.57, 'Numerator': 480.0, 'Denominat...","{'Value': 0.98, 'Numerator': 540.0, 'Denominat..."
62874,CORP 102022-23;Q3Real_Quarterly,CORP 102022-23;Q3Family Group 3Real_Quarterly,CORP 102022-23;Q3West LothianReal_Quarterly,CORP 10,West Lothian,2022-23;Q3,0.84,705.0,840.0,7.0,1.000,24.0,0.774194,Real_Quarterly,"{'Value': 0.79, 'Numerator': 650.0, 'Denominat...","{'Value': 0.98, 'Numerator': 540.0, 'Denominat..."


#### Comparisons to First and Previous Rows

In [165]:
indicatorInfo_dict = info.set_index('Code_Sortable').to_dict('index')
indicatorInfo_dict = NocaseDict(indicatorInfo_dict)
# Define a function that will return an aim adjusted percentage change between two indicator values. There are two niche cases here. One where previous and current values are both 0 resulting in 0% in all cases. Another where only the previous value is 0 resulting in None being returned as it is not possible to calculate % change from 0. Having looked at the dataset this has only occured 3 times and only affects Orkney and Eilean Siar for CHN20b. Further to this changes in percentage indicators are calculated using 100 as a denominator rather than previous. This is to avoid situations where very small percentages return 1000% or more change (which for our purposes seemed unreasonable to report).
def PercentChange_AimAdjusted(Previous, Current, Code):
    Aim = None
    SignedChange = None
    PercentChange = None
    GoldiMid = None
    IsPercentage = False

    if Previous == 0 and Current == 0:
        PercentChange = 0

    indicatorInfo = indicatorInfo_dict[Code]
    Aim = indicatorInfo['Ranking_Type']
    GoldiMid = indicatorInfo['Ranking_GoldilocksMidpoint']
    IsPercentage = indicatorInfo['MeasureType'] == 'Percentage'

    if IsPercentage != True and Previous != 0:
        if Aim == "Descending":
            SignedChange = distance(Current, Previous)
            PercentChange = SignedChange/Previous
        if Aim == "Ascending":
            SignedChange = -distance(Current, Previous)
            PercentChange = SignedChange/Previous
        if Aim == "Goldilocks":
            Current_DistGoldi = abs(distance(Current, GoldiMid))
            Previous_DistGoldi = abs(distance(Previous, GoldiMid))
            SignedChange = distance(Previous_DistGoldi, Current_DistGoldi)
            PercentChange = SignedChange/Previous_DistGoldi
            
    elif IsPercentage == True and Previous != 0:
        if Aim == "Descending":
            PercentChange = distance(Current, Previous)
        if Aim == "Ascending":
            PercentChange = -distance(Current, Previous)
        if Aim == "Goldilocks":
            Current_DistGoldi = abs(distance(Current, GoldiMid))
            Previous_DistGoldi = abs(distance(Previous, GoldiMid))
            PercentChange = distance(Previous_DistGoldi, Current_DistGoldi)

    return PercentChange

# Define a function that calculates the changes in the current row by accessing values in the Previous_Row and First_Row python dictionaries.
def Changes(df):

    # Set the intial value of the return variable to none. This allows us to test to see if there were any changes present for a row and then return None instead of a dictionary of None values if not.
    Changes = None

    # Define all variables that will contain all of the relevant changes for a row.
    ScotRank_ChangeSincePrevious = None
    ScotPct_ChangeSincePrevious = None
    FamilyRank_ChangeSincePrevious = None
    FamilyPct_ChangeSincePrevious = None
    Value_ChangeSincePrevious = None
    Numerator_ChangeSincePrevious = None
    Denominator_ChangeSincePrevious = None
    ScotRank_ChangeSinceFirst = None
    ScotPct_ChangeSinceFirst = None
    FamilyRank_ChangeSinceFirst = None
    FamilyPct_ChangeSinceFirst = None
    Value_ChangeSinceFirst = None
    Numerator_ChangeSinceFirst = None
    Denominator_ChangeSinceFirst = None
    PercentChange_AimAdjusted_SincePrevious = None
    PercentChange_AimAdjusted_SinceFirst = None

    # If the value currently in Previous_Row is not None then there exists a previous object to calculate the changes using.
    if df['Previous_Row'] != None:
        # Calculate all differences by comparing the current rows value to the same columns value in the Previous_Row dictionary. Ranks and Percentiles are always positive so the calculations are more simple. The other values use the distance function defined at the start of the notebook to determine the signed difference between the values (comparing a current value of -1 to a previous value of 2 will result in -3 difference.)
        ScotRank_ChangeSincePrevious = - (df['ScotRank'] - df['Previous_Row'].get('ScotRank'))
        ScotPct_ChangeSincePrevious = - (df['ScotPct'] - df['Previous_Row'].get('ScotPct'))
        FamilyRank_ChangeSincePrevious = - (df['FamilyRank'] - df['Previous_Row'].get('FamilyRank'))
        FamilyPct_ChangeSincePrevious = - (df['FamilyPct'] - df['Previous_Row'].get('FamilyPct'))
        Real_Value_ChangeSincePrevious = distance(df['Value'], df['Previous_Row'].get('Value'))
        Real_Numerator_ChangeSincePrevious = distance(df['Numerator'], df['Previous_Row'].get('Numerator'))
        Real_Denominator_ChangeSincePrevious = distance(df['Denominator'], df['Previous_Row'].get('Denominator'))
        PercentChange_AimAdjusted_SincePrevious = PercentChange_AimAdjusted(df['Previous_Row'].get('Value'), df['Value'], df['Code'])
        #Set Changes to true to avoid creating a dictionary of None values
        Changes = True

    # If the value currently in First_Row is not None then there exists a previous object to calculate the changes using.
    if df['First_Row'] != None:
        # Calculate all differences by comparing the current rows value to the same columns value in the First_Row dictionary. Ranks and Percentiles are always positive so the calculations are more simple. The other values use the distance function defined at the start of the notebook to determine the signed difference between the values (comparing a current value of -1 to a previous value of 2 will result in -3 difference.)
        ScotRank_ChangeSinceFirst = - (df['ScotRank'] - df['First_Row'].get('ScotRank'))
        ScotPct_ChangeSinceFirst = - (df['ScotPct'] - df['First_Row'].get('ScotPct'))
        FamilyRank_ChangeSinceFirst = - (df['FamilyRank'] - df['First_Row'].get('FamilyRank'))
        FamilyPct_ChangeSinceFirst = - (df['FamilyPct'] - df['First_Row'].get('FamilyPct'))
        Real_Value_ChangeSinceFirst = distance(df['Value'], df['First_Row'].get('Value'))
        Real_Numerator_ChangeSinceFirst = distance(df['Numerator'], df['First_Row'].get('Numerator'))
        Real_Denominator_ChangeSinceFirst = distance(df['Denominator'], df['First_Row'].get('Denominator'))
        PercentChange_AimAdjusted_SinceFirst = PercentChange_AimAdjusted(df['First_Row'].get('Value'), df['Value'], df['Code'])
        #Set Changes to true to avoid creating a dictionary of None values
        Changes = True

    # If there were changes recorded in the previous steps then write these changes into a python dictionary and assign this to Changes
    if Changes != None:
        Changes = {
            "ScotRank_ChangeSincePrevious": ScotRank_ChangeSincePrevious,
            "ScotPct_ChangeSincePrevious": ScotPct_ChangeSincePrevious,
            "FamilyRank_ChangeSincePrevious": FamilyRank_ChangeSincePrevious,
            "FamilyPct_ChangeSincePrevious": FamilyPct_ChangeSincePrevious,
            "ScotRank_ChangeSinceFirst": ScotRank_ChangeSinceFirst,
            "ScotPct_ChangeSinceFirst": ScotPct_ChangeSinceFirst,
            "FamilyRank_ChangeSinceFirst": FamilyRank_ChangeSinceFirst,
            "FamilyPct_ChangeSinceFirst": FamilyPct_ChangeSinceFirst,
            "Value_ChangeSincePrevious": Value_ChangeSincePrevious,
            "Numerator_ChangeSincePrevious": Numerator_ChangeSincePrevious,
            "Denominator_ChangeSincePrevious": Denominator_ChangeSincePrevious,
            "Value_ChangeSinceFirst": Value_ChangeSinceFirst,
            "Numerator_ChangeSinceFirst": Numerator_ChangeSinceFirst,
            "Denominator_ChangeSinceFirst": Denominator_ChangeSinceFirst,
            "PercentChange_AimAdjusted_SincePrevious": PercentChange_AimAdjusted_SincePrevious,
            "PercentChange_AimAdjusted_SinceFirst": PercentChange_AimAdjusted_SinceFirst
        }

    return Changes

#Apply the above function and output it to a new column named Changes.
IndicatorDataMain['Changes'] = IndicatorDataMain.apply(Changes, axis=1)
IndicatorDataMain.head(5)

Unnamed: 0,Key_CodePeriodDType,Key_CodePeriodFamilyGroupDType,Key_CodePeriodLADType,Code,LocalAuthority,Period,Value,Numerator,Denominator,FamilyRank,FamilyPct,ScotRank,ScotPct,Data Type,Previous_Row,First_Row,Changes
30609,C&L 012010-11Cash_Annual,C&L 012010-11Family Group 4Cash_Annual,C&L 012010-11Aberdeen CityCash_Annual,C&L 01,Aberdeen City,2010-11,0.33,637000.0,1922292.0,1.0,0.125,1.0,0.03125,Cash_Annual,,,
30610,C&L 012011-12Cash_Annual,C&L 012011-12Family Group 4Cash_Annual,C&L 012011-12Aberdeen CityCash_Annual,C&L 01,Aberdeen City,2011-12,0.76,1554000.0,2045051.0,1.0,0.125,1.0,0.03125,Cash_Annual,"{'Value': 0.33, 'Numerator': 637000.0, 'Denomi...","{'Value': 0.33, 'Numerator': 637000.0, 'Denomi...","{'ScotRank_ChangeSincePrevious': -0.0, 'ScotPc..."
30611,C&L 012012-13Cash_Annual,C&L 012012-13Family Group 4Cash_Annual,C&L 012012-13Aberdeen CityCash_Annual,C&L 01,Aberdeen City,2012-13,3.64,7883000.0,2163756.0,5.0,0.625,24.0,0.75,Cash_Annual,"{'Value': 0.76, 'Numerator': 1554000.0, 'Denom...","{'Value': 0.33, 'Numerator': 637000.0, 'Denomi...","{'ScotRank_ChangeSincePrevious': -23.0, 'ScotP..."
30612,C&L 012013-14Cash_Annual,C&L 012013-14Family Group 4Cash_Annual,C&L 012013-14Aberdeen CityCash_Annual,C&L 01,Aberdeen City,2013-14,3.48,7726000.0,2222588.0,5.0,0.625,23.0,0.71875,Cash_Annual,"{'Value': 3.64, 'Numerator': 7883000.0, 'Denom...","{'Value': 0.33, 'Numerator': 637000.0, 'Denomi...","{'ScotRank_ChangeSincePrevious': 1.0, 'ScotPct..."
30613,C&L 012014-15Cash_Annual,C&L 012014-15Family Group 4Cash_Annual,C&L 012014-15Aberdeen CityCash_Annual,C&L 01,Aberdeen City,2014-15,3.02,7517000.0,2487138.0,5.0,0.625,20.0,0.625,Cash_Annual,"{'Value': 3.48, 'Numerator': 7726000.0, 'Denom...","{'Value': 0.33, 'Numerator': 637000.0, 'Denomi...","{'ScotRank_ChangeSincePrevious': 3.0, 'ScotPct..."


#### Convert Python Dictionaries to JSON

In [166]:
def PreviousConvertToJson(df):
    Previous_Row = simplejson.dumps(df['Previous_Row'], ignore_nan=True)
    return Previous_Row


def FirstConvertToJson(df):
    First_Row = simplejson.dumps(df['First_Row'], ignore_nan=True)
    return First_Row


def ChangesConvertToJson(df):
    Changes = simplejson.dumps(df['Changes'], ignore_nan=True)
    return Changes

IndicatorDataMain['Previous_Row'] = IndicatorDataMain.apply(PreviousConvertToJson, axis=1)
IndicatorDataMain['First_Row'] = IndicatorDataMain.apply(FirstConvertToJson, axis=1)
IndicatorDataMain['Changes'] = IndicatorDataMain.apply(ChangesConvertToJson, axis=1)

IndicatorDataMain.head(5)

Unnamed: 0,Key_CodePeriodDType,Key_CodePeriodFamilyGroupDType,Key_CodePeriodLADType,Code,LocalAuthority,Period,Value,Numerator,Denominator,FamilyRank,FamilyPct,ScotRank,ScotPct,Data Type,Previous_Row,First_Row,Changes
30609,C&L 012010-11Cash_Annual,C&L 012010-11Family Group 4Cash_Annual,C&L 012010-11Aberdeen CityCash_Annual,C&L 01,Aberdeen City,2010-11,0.33,637000.0,1922292.0,1.0,0.125,1.0,0.03125,Cash_Annual,,,
30610,C&L 012011-12Cash_Annual,C&L 012011-12Family Group 4Cash_Annual,C&L 012011-12Aberdeen CityCash_Annual,C&L 01,Aberdeen City,2011-12,0.76,1554000.0,2045051.0,1.0,0.125,1.0,0.03125,Cash_Annual,"{""Value"": 0.33, ""Numerator"": 637000.0, ""Denomi...","{""Value"": 0.33, ""Numerator"": 637000.0, ""Denomi...","{""ScotRank_ChangeSincePrevious"": -0.0, ""ScotPc..."
30611,C&L 012012-13Cash_Annual,C&L 012012-13Family Group 4Cash_Annual,C&L 012012-13Aberdeen CityCash_Annual,C&L 01,Aberdeen City,2012-13,3.64,7883000.0,2163756.0,5.0,0.625,24.0,0.75,Cash_Annual,"{""Value"": 0.76, ""Numerator"": 1554000.0, ""Denom...","{""Value"": 0.33, ""Numerator"": 637000.0, ""Denomi...","{""ScotRank_ChangeSincePrevious"": -23.0, ""ScotP..."
30612,C&L 012013-14Cash_Annual,C&L 012013-14Family Group 4Cash_Annual,C&L 012013-14Aberdeen CityCash_Annual,C&L 01,Aberdeen City,2013-14,3.48,7726000.0,2222588.0,5.0,0.625,23.0,0.71875,Cash_Annual,"{""Value"": 3.64, ""Numerator"": 7883000.0, ""Denom...","{""Value"": 0.33, ""Numerator"": 637000.0, ""Denomi...","{""ScotRank_ChangeSincePrevious"": 1.0, ""ScotPct..."
30613,C&L 012014-15Cash_Annual,C&L 012014-15Family Group 4Cash_Annual,C&L 012014-15Aberdeen CityCash_Annual,C&L 01,Aberdeen City,2014-15,3.02,7517000.0,2487138.0,5.0,0.625,20.0,0.625,Cash_Annual,"{""Value"": 3.48, ""Numerator"": 7726000.0, ""Denom...","{""Value"": 0.33, ""Numerator"": 637000.0, ""Denomi...","{""ScotRank_ChangeSincePrevious"": 3.0, ""ScotPct..."


### Latest Values Extraction

In [167]:
LatestValues = IndicatorDataMain.copy(deep=True)
LatestValues['Period'] = LatestValues.Period.replace({'January': '01', 'February': '02', 'March': '03', 'April': '04', 'May': '05','June': '06', 'July': '07', 'August': '08', 'September': '09', 'October': '10', 'November': '11', 'December': '12'}, regex=True)
LatestValues.sort_values(by=['Data Type','LocalAuthority', 'Code', 'Period'], inplace=True)
LatestValues = LatestValues.groupby(['Data Type', 'LocalAuthority', 'Code']).tail(1)
LatestValues = LatestValues[['Key_CodePeriodDType','Key_CodePeriodLADType','Key_CodePeriodFamilyGroupDType','Data Type', 'LocalAuthority', 'Code', 'Period', 'Value', 'Numerator', 'Denominator', 'ScotRank', 'ScotPct', 'FamilyRank', 'FamilyPct', 'Previous_Row', 'First_Row', 'Changes']]

LatestValues.head(5)

Unnamed: 0,Key_CodePeriodDType,Key_CodePeriodLADType,Key_CodePeriodFamilyGroupDType,Data Type,LocalAuthority,Code,Period,Value,Numerator,Denominator,ScotRank,ScotPct,FamilyRank,FamilyPct,Previous_Row,First_Row,Changes
30620,C&L 012021-22Cash_Annual,C&L 012021-22Aberdeen CityCash_Annual,C&L 012021-22Family Group 4Cash_Annual,Cash_Annual,Aberdeen City,C&L 01,2021-22,4.8,6888000.0,1434492.0,11.0,0.34375,3.0,0.375,"{""Value"": 27.68, ""Numerator"": 7032000.0, ""Deno...","{""Value"": 0.33, ""Numerator"": 637000.0, ""Denomi...","{""ScotRank_ChangeSincePrevious"": 2.0, ""ScotPct..."
30632,C&L 022021-22Cash_Annual,C&L 022021-22Aberdeen CityCash_Annual,C&L 022021-22Family Group 4Cash_Annual,Cash_Annual,Aberdeen City,C&L 02,2021-22,2.45,3300000.0,1347180.0,9.0,0.28125,2.0,0.25,"{""Value"": 10.74, ""Numerator"": 3225000.0, ""Deno...","{""Value"": 3.6, ""Numerator"": 5695000.0, ""Denomi...","{""ScotRank_ChangeSincePrevious"": 15.0, ""ScotPc..."
30644,C&L 032021-22Cash_Annual,C&L 032021-22Aberdeen CityCash_Annual,C&L 032021-22Family Group 4Cash_Annual,Cash_Annual,Aberdeen City,C&L 03,2021-22,3.13,3722000.0,1190854.0,8.0,0.307692,3.0,0.428571,"{""Value"": 16.43, ""Numerator"": 3111000.0, ""Deno...","{""Value"": 5.02, ""Numerator"": 3528000.0, ""Denom...","{""ScotRank_ChangeSincePrevious"": 7.0, ""ScotPct..."
30656,C&L 042021-22Cash_Annual,C&L 042021-22Aberdeen CityCash_Annual,C&L 042021-22Family Group 4Cash_Annual,Cash_Annual,Aberdeen City,C&L 04,2021-22,14281.32,3248000.0,227.43,7.0,0.21875,2.0,0.25,"{""Value"": 12795.77, ""Numerator"": 2931000.0, ""D...","{""Value"": 21412.12, ""Numerator"": 4649000.0, ""D...","{""ScotRank_ChangeSincePrevious"": -0.0, ""ScotPc..."
30664,C&L 05a2018-21Cash_Annual,C&L 05a2018-21Aberdeen CityCash_Annual,C&L 05a2018-21Family Group 4Cash_Annual,Cash_Annual,Aberdeen City,C&L 05a,2018-21,0.724,,,20.0,0.625,6.0,0.75,"{""Value"": 0.727, ""Numerator"": null, ""Denominat...","{""Value"": 0.734, ""Numerator"": null, ""Denominat...","{""ScotRank_ChangeSincePrevious"": -1.0, ""ScotPc..."


### Error Checks

In [168]:
# Default position is to assume checks are passed. If any of the checks (excluding numerator denominator checks) are failed below this will be changed and the final csv's will not be output. The numerator denominator errors should be checked at each refresh. The known errors (which exist within the LGBF raw data file) will be identified in the readme in the Error Outputs folder.
ChecksFailed = False

# ScotRank should be between 32 and 1 and should not contain any NaN values
if not IndicatorDataMain['ScotRank'].between(1, 32).any() or IndicatorDataMain['ScotRank'].isnull().values.any():

    ChecksFailed = True
    maxrank = str(max(IndicatorDataMain['ScotRank']))
    minrank = str(min(IndicatorDataMain['ScotRank']))
    countnull = str(IndicatorDataMain['ScotRank'].isna().sum())

    print("ScotRank checks failed : Max - " + maxrank + ", Min - " + minrank + ", Count of Null - " + countnull)

# FamilyRank should be between 8 and 1 and should not contain any NaN values
if not IndicatorDataMain['FamilyRank'].between(1, 8).any() or IndicatorDataMain['FamilyRank'].isnull().values.any():

    ChecksFailed = True
    maxrank = str(max(IndicatorDataMain['FamilyRank']))
    minrank = str(min(IndicatorDataMain['FamilyRank']))
    countnull = str(IndicatorDataMain['FamilyRank'].isna().sum())

    print("FamilyRank checks failed : Max - " + maxrank + ", Min - " + minrank + ", Count of Null - " + countnull)

# Code, Local_Authority, Period, Value should not contain any null values
if IndicatorDataMain[['Code', 'LocalAuthority', 'Period', 'Value']].isnull().values.any():

    ChecksFailed = True
    countnullCode = str(IndicatorDataMain['Code'].isna().sum())
    countnullLocal_Authority = str(IndicatorDataMain['LocalAuthority'].isna().sum())
    countnullPeriod = str(IndicatorDataMain['Period'].isna().sum())
    countnullReal_Value = str(IndicatorDataMain['Value'].isna().sum())

    print("Null values found : Code - " + countnullCode + ", LocalAuthority - " + countnullLocal_Authority + ", Period - " + countnullPeriod + ", Value - " + countnullReal_Value)

# Value should equal numerator/denominator for both cash and real - These errors have been checked and exist in the original raw data file.
NumDenCheck = IndicatorDataMain.copy(deep=True)
NumDenCheck = IndicatorDataMain.rename(columns = {"Data Type": "DataType"})
NumDenCheck = NumDenCheck[pd.notnull(NumDenCheck['Numerator'])]

NumDenDivide_Checks = []
NumDenDivide_Check = None
FailReferences = []

for row in NumDenCheck.itertuples():
    if row.Value == 0 or math.isnan(row.Denominator) or math.isnan(row.Numerator):
        NumDenDivide_Check = None
    else:
        if math.isclose(row.Numerator/row.Denominator, row.Value, rel_tol=0.02):
            NumDenDivide_Check = True
        else:
            NumDenDivide_Check = False
            FailReferences.append(row.DataType + ":" + row.Code + ":" + row.Period + ":" + row.LocalAuthority + ":" + str(row.Value) + ":" + str(row.Numerator) + ":" + str(row.Denominator))

    NumDenDivide_Checks.append(NumDenDivide_Check)

if False in NumDenDivide_Checks :
    print("Numerator/Denominator values check failed : See Error Outputs for csv of failures")
    FailReferences = sorted(list(set(FailReferences)))
    FailReferences = pd.DataFrame([sub.split(":") for sub in FailReferences])
    FailReferences = FailReferences.rename(columns={0: 'Type', 1: 'Code', 2: 'Period', 3: 'Local Authority', 4: 'Value', 5: 'Numerator', 6: 'Denominator'})
    FailReferences.to_csv("Error Outputs//Numerator Denominator Fail References.csv", index=False, encoding='utf-8-sig')


Numerator/Denominator values check failed : See Error Outputs for csv of failures


### Export Main Data and Latest to CSV

In [169]:
if ChecksFailed == False :
    IndicatorDataMain.to_csv("Data Files//Indicator Data.csv", index=False, encoding='utf-8-sig')
    LatestValues.to_csv("Data Files//Latest Values.csv", index = False, encoding='utf-8-sig')
else :
    print("Checks failed! Check output from section 11 for detail")