### Import Libraries

In [215]:
import pandas as pd
import numpy as np
from nocasedict import NocaseDict
import simplejson
import math

### Load data from CSVs

In [216]:
data = pd.read_csv('C:\\Users\\lowsona\\Desktop\\LGBF_Data_Table.csv',sep='\t')
info = pd.read_csv('Data Files\\Indicator Information.csv')
fg = pd.read_csv('Data Files\\Family Groups.csv')

### Clean and Merge Dataframes

In [217]:
#Fix for CHN14a and CHN14b which are not in decimal percentage format.
data['LA_Data_LA_IndicatorReal'] = np.where(data['Indicators_Information_Code'] =='CHN14a', data['LA_Data_LA_IndicatorReal'] / 100, data['LA_Data_LA_IndicatorReal'])
data['LA_Data_LA_IndicatorReal'] = np.where(data['Indicators_Information_Code'] == 'CHN14b', data['LA_Data_LA_IndicatorReal'] / 100, data['LA_Data_LA_IndicatorReal'])
data['Scotland_Data_Scotland_Indicator_Real'] = np.where(data['Indicators_Information_Code'] == 'CHN14a', data['Scotland_Data_Scotland_Indicator_Real'] / 100, data['Scotland_Data_Scotland_Indicator_Real'])
data['Scotland_Data_Scotland_Indicator_Real'] = np.where(data['Indicators_Information_Code'] == 'CHN14b', data['Scotland_Data_Scotland_Indicator_Real'] / 100, data['Scotland_Data_Scotland_Indicator_Real'])

#Main file transformations
merged_data = data.merge(info, how='left', right_on='Code', left_on ='Indicators_Information_Code')
merged_data = merged_data.merge(fg, how='left', right_on=['Local_Authority', 'Type'], left_on=[
                                'LA_Information_LocalAuthority', 'FamilyGrouping'])
merged_data = merged_data[[
                            'Code_Sortable',
                            'LA_Information_LocalAuthority',
                            'LA_Data_LGBF_Year',
                            'LA_Data_LA_IndicatorReal',
                            'LA_Data_LA_Numerator_real',
                            'LA_Data_LA_Den_Real',
                            'Scotland_Data_Scotland_Indicator_Real',
                            'Scotland_Data_Scotland_Num_Real',
                            'Scotland_Data_Scotland_Den_Real',
                            'FamilyGrouping',
                            'Ranking_Type',
                            'Ranking_GoldilocksMidpoint',
                            'Family_Group',
                            'Numerator_Multipier',
                            'Denominator_Multiplier'
                        ]]
merged_data = merged_data.rename(columns={
                                            'Code_Sortable':'Code',
                                            'LA_Information_LocalAuthority': 'LocalAuthority',
                                            'LA_Data_LGBF_Year': 'Period',
                                            'LA_Data_LA_IndicatorReal': 'Real_Value',
                                            'LA_Data_LA_Numerator_real' : 'Real_Numerator',
                                            'LA_Data_LA_Den_Real': 'Real_Denominator',
                                            'Scotland_Data_Scotland_Indicator_Real': 'Scot_Real_Value',
                                            'Scotland_Data_Scotland_Num_Real': 'Scot_Real_Numerator',
                                            'Scotland_Data_Scotland_Den_Real': 'Scot_Real_Denominator',
                                            'FamilyGrouping': 'FG_Type'
                                })
merged_data['Real_Numerator'] = merged_data['Real_Numerator'] * merged_data['Numerator_Multipier']
merged_data['Scot_Real_Numerator'] = merged_data['Scot_Real_Numerator'] * merged_data['Numerator_Multipier']
merged_data['Real_Denominator'] = merged_data['Real_Denominator'] * merged_data['Denominator_Multiplier']
merged_data['Scot_Real_Denominator'] = merged_data['Scot_Real_Denominator'] * merged_data['Denominator_Multiplier']
merged_data.head(5)


Unnamed: 0,Code,LocalAuthority,Period,Real_Value,Real_Numerator,Real_Denominator,Scot_Real_Value,Scot_Real_Numerator,Scot_Real_Denominator,FG_Type,Ranking_Type,Ranking_GoldilocksMidpoint,Family_Group,Numerator_Multipier,Denominator_Multiplier
0,C&L 01,Aberdeen City,2010-11,0.406679,785018.8,1922292.0,4.374884,198956051.7,45459818.0,"Environmental, Culture & Leisure, Economic Dev...",Ascending,,Family Group 4,1000.0,1.0
1,C&L 01,Aberdeen City,2011-12,0.922524,1886603.0,2045051.0,3.916959,188806026.5,48202343.0,"Environmental, Culture & Leisure, Economic Dev...",Ascending,,Family Group 4,1000.0,1.0
2,C&L 01,Aberdeen City,2012-13,4.335545,9381043.0,2163756.0,3.73456,192795134.1,51624697.0,"Environmental, Culture & Leisure, Economic Dev...",Ascending,,Family Group 4,1000.0,1.0
3,C&L 01,Aberdeen City,2013-14,4.044078,8988324.0,2222588.0,3.710667,197855971.2,53320837.0,"Environmental, Culture & Leisure, Economic Dev...",Ascending,,Family Group 4,1000.0,1.0
4,C&L 01,Aberdeen City,2014-15,3.476122,8645573.0,2487138.0,3.408087,179623656.7,52705262.0,"Environmental, Culture & Leisure, Economic Dev...",Ascending,,Family Group 4,1000.0,1.0


### Average Calculations

#### Mean and Medians - Scottish

In [218]:
merged_data_averagecleaned = merged_data[[
                                'Code',
                                'LocalAuthority',
                                'Period',
                                'Real_Value',
                                'Real_Numerator',
                                'Real_Denominator',
                                'FG_Type',
                                'Family_Group'
                            ]]
ScotAverages_Mean = merged_data_averagecleaned.groupby(['Code', 'Period'], as_index=False).mean()
ScotAverages_Median = merged_data_averagecleaned.groupby(['Code', 'Period'], as_index=False).median()
ScotAverages = ScotAverages_Mean.merge(ScotAverages_Median, how='left', on=['Code', 'Period'], suffixes=('_Mean', '_Median'))
ScotAverages

Unnamed: 0,Code,Period,Real_Value_Mean,Real_Numerator_Mean,Real_Denominator_Mean,Real_Value_Median,Real_Numerator_Median,Real_Denominator_Median
0,C&L 01,2010-11,4.064098,6.217377e+06,1.420619e+06,4.239325,4.469185e+06,1080558.5
1,C&L 01,2011-12,3.806004,5.900188e+06,1.506323e+06,3.513760,4.074893e+06,1099975.5
2,C&L 01,2012-13,3.651857,6.024848e+06,1.613272e+06,3.200095,4.491786e+06,1180585.0
3,C&L 01,2013-14,3.579265,6.182999e+06,1.666276e+06,3.183841,4.398764e+06,1187107.5
4,C&L 01,2014-15,3.461686,5.613239e+06,1.647039e+06,2.905894,4.543038e+06,1104013.0
...,...,...,...,...,...,...,...,...
964,SW 07,2017-18,0.867717,,,0.870980,,
965,SW 07,2018-19,0.843080,,,0.848260,,
966,SW 07,2019-20,0.844733,,,0.845795,,
967,SW 07,2020-21,0.853906,,,0.851000,,


#### Sum Num/Den - Scottish

In [219]:
merged_data_numdenaveragecleaned = merged_data[[
                                'Code',
                                'LocalAuthority',
                                'Period',
                                'Real_Numerator',
                                'Real_Denominator',
                                'FG_Type',
                                'Family_Group'
                            ]]
ScotAverages_NumDenSums = merged_data_numdenaveragecleaned.groupby(['Code', 'Period'], as_index=False).sum()
ScotAverages_NumDenSums['Real_Scot_NumDenAv'] = ScotAverages_NumDenSums['Real_Numerator'] / ScotAverages_NumDenSums['Real_Denominator']
ScotAverages_NumDenSums = ScotAverages_NumDenSums[['Code', 'Period', 'Real_Scot_NumDenAv']]
ScotAverages_NumDenSums = ScotAverages_NumDenSums.replace([np.inf, -np.inf], np.nan)

ScotAverages = ScotAverages.merge(ScotAverages_NumDenSums, how = 'left', on = ['Code', 'Period'])
ScotAverages


Unnamed: 0,Code,Period,Real_Value_Mean,Real_Numerator_Mean,Real_Denominator_Mean,Real_Value_Median,Real_Numerator_Median,Real_Denominator_Median,Real_Scot_NumDenAv
0,C&L 01,2010-11,4.064098,6.217377e+06,1.420619e+06,4.239325,4.469185e+06,1080558.5,4.376525
1,C&L 01,2011-12,3.806004,5.900188e+06,1.506323e+06,3.513760,4.074893e+06,1099975.5,3.916947
2,C&L 01,2012-13,3.651857,6.024848e+06,1.613272e+06,3.200095,4.491786e+06,1180585.0,3.734552
3,C&L 01,2013-14,3.579265,6.182999e+06,1.666276e+06,3.183841,4.398764e+06,1187107.5,3.710669
4,C&L 01,2014-15,3.461686,5.613239e+06,1.647039e+06,2.905894,4.543038e+06,1104013.0,3.408078
...,...,...,...,...,...,...,...,...,...
964,SW 07,2017-18,0.867717,,,0.870980,,,
965,SW 07,2018-19,0.843080,,,0.848260,,,
966,SW 07,2019-20,0.844733,,,0.845795,,,
967,SW 07,2020-21,0.853906,,,0.851000,,,


#### Add Keys & Sort Columns

In [220]:
ScotAverages['Key_CodePeriod'] = ScotAverages['Code'] + ScotAverages['Period']
ScotAverages = ScotAverages[['Key_CodePeriod','Code', 'Period', 'Real_Value_Mean', 'Real_Numerator_Mean', 'Real_Denominator_Mean', 'Real_Value_Median', 'Real_Numerator_Median', 'Real_Denominator_Median', 'Real_Scot_NumDenAv']]
ScotAverages

Unnamed: 0,Key_CodePeriod,Code,Period,Real_Value_Mean,Real_Numerator_Mean,Real_Denominator_Mean,Real_Value_Median,Real_Numerator_Median,Real_Denominator_Median,Real_Scot_NumDenAv
0,C&L 012010-11,C&L 01,2010-11,4.064098,6.217377e+06,1.420619e+06,4.239325,4.469185e+06,1080558.5,4.376525
1,C&L 012011-12,C&L 01,2011-12,3.806004,5.900188e+06,1.506323e+06,3.513760,4.074893e+06,1099975.5,3.916947
2,C&L 012012-13,C&L 01,2012-13,3.651857,6.024848e+06,1.613272e+06,3.200095,4.491786e+06,1180585.0,3.734552
3,C&L 012013-14,C&L 01,2013-14,3.579265,6.182999e+06,1.666276e+06,3.183841,4.398764e+06,1187107.5,3.710669
4,C&L 012014-15,C&L 01,2014-15,3.461686,5.613239e+06,1.647039e+06,2.905894,4.543038e+06,1104013.0,3.408078
...,...,...,...,...,...,...,...,...,...,...
964,SW 072017-18,SW 07,2017-18,0.867717,,,0.870980,,,
965,SW 072018-19,SW 07,2018-19,0.843080,,,0.848260,,,
966,SW 072019-20,SW 07,2019-20,0.844733,,,0.845795,,,
967,SW 072020-21,SW 07,2020-21,0.853906,,,0.851000,,,


#### Output File

In [221]:
ScotAverages.to_csv('Data Files\\Scottish Averages.csv', index=False, encoding='utf-8-sig')

#### Mean and Medians - Family Groups

In [222]:
FGAverages_Mean = merged_data_averagecleaned.groupby(['Code', 'Family_Group', 'Period'], as_index=False).mean()
FGAverages_Median = merged_data_averagecleaned.groupby(['Code', 'Family_Group', 'Period'], as_index=False).median()
FGAverages = FGAverages_Mean.merge(FGAverages_Median, how='left', on=['Code','Family_Group','Period'], suffixes=('_Mean', '_Median'))
FGAverages

Unnamed: 0,Code,Family_Group,Period,Real_Value_Mean,Real_Numerator_Mean,Real_Denominator_Mean,Real_Value_Median,Real_Numerator_Median,Real_Denominator_Median
0,C&L 01,Family Group 1,2010-11,3.710950,3.790920e+06,923124.125,3.999014,2.409281e+06,695444.0
1,C&L 01,Family Group 1,2011-12,3.689641,3.901135e+06,1036476.000,3.478073,2.216213e+06,707515.5
2,C&L 01,Family Group 1,2012-13,3.297381,3.775534e+06,1065939.000,3.100354,2.228935e+06,714443.0
3,C&L 01,Family Group 1,2013-14,3.179201,3.436062e+06,1040247.500,3.079052,2.497791e+06,687793.5
4,C&L 01,Family Group 1,2014-15,3.403019,3.508778e+06,993567.375,3.340128,2.320975e+06,689040.0
...,...,...,...,...,...,...,...,...,...
3871,SW 07,Family Group 4,2017-18,0.865630,,,0.873106,,
3872,SW 07,Family Group 4,2018-19,0.824202,,,0.862044,,
3873,SW 07,Family Group 4,2019-20,0.848583,,,0.849656,,
3874,SW 07,Family Group 4,2020-21,0.842375,,,0.842500,,


#### Sum Num/Den - Family Groups

In [223]:
merged_data_numdenaveragecleaned = merged_data[[
                                                    'Code',
                                                    'LocalAuthority',
                                                    'Period',
                                                    'Real_Numerator',
                                                    'Real_Denominator',
                                                    'FG_Type',
                                                    'Family_Group'
                                                ]]
FGAverages_NumDenSums = merged_data_numdenaveragecleaned.groupby(['Code', 'Family_Group', 'Period'], as_index=False).sum()
FGAverages_NumDenSums['Real_FG_NumDenAv'] = FGAverages_NumDenSums['Real_Numerator'] / FGAverages_NumDenSums['Real_Denominator']
FGAverages_NumDenSums = FGAverages_NumDenSums[['Code', 'Family_Group', 'Period', 'Real_FG_NumDenAv']]
FGAverages_NumDenSums = FGAverages_NumDenSums.replace([np.inf, -np.inf], np.nan)

FGAverages = FGAverages.merge(FGAverages_NumDenSums, how='left', on=['Code', 'Family_Group', 'Period'])
FGAverages


Unnamed: 0,Code,Family_Group,Period,Real_Value_Mean,Real_Numerator_Mean,Real_Denominator_Mean,Real_Value_Median,Real_Numerator_Median,Real_Denominator_Median,Real_FG_NumDenAv
0,C&L 01,Family Group 1,2010-11,3.710950,3.790920e+06,923124.125,3.999014,2.409281e+06,695444.0,4.106620
1,C&L 01,Family Group 1,2011-12,3.689641,3.901135e+06,1036476.000,3.478073,2.216213e+06,707515.5,3.763845
2,C&L 01,Family Group 1,2012-13,3.297381,3.775534e+06,1065939.000,3.100354,2.228935e+06,714443.0,3.541979
3,C&L 01,Family Group 1,2013-14,3.179201,3.436062e+06,1040247.500,3.079052,2.497791e+06,687793.5,3.303120
4,C&L 01,Family Group 1,2014-15,3.403019,3.508778e+06,993567.375,3.340128,2.320975e+06,689040.0,3.531495
...,...,...,...,...,...,...,...,...,...,...
3871,SW 07,Family Group 4,2017-18,0.865630,,,0.873106,,,
3872,SW 07,Family Group 4,2018-19,0.824202,,,0.862044,,,
3873,SW 07,Family Group 4,2019-20,0.848583,,,0.849656,,,
3874,SW 07,Family Group 4,2020-21,0.842375,,,0.842500,,,


#### Add Keys & Sort Columns

In [224]:
FGAverages['Key_CodePeriod'] = FGAverages['Code'] + FGAverages['Period']
FGAverages = FGAverages[['Key_CodePeriod', 'Code', 'Period','Family_Group', 'Real_Value_Mean', 'Real_Numerator_Mean', 'Real_Denominator_Mean', 'Real_Value_Median', 'Real_Numerator_Median', 'Real_Denominator_Median', 'Real_FG_NumDenAv']]
FGAverages

Unnamed: 0,Key_CodePeriod,Code,Period,Family_Group,Real_Value_Mean,Real_Numerator_Mean,Real_Denominator_Mean,Real_Value_Median,Real_Numerator_Median,Real_Denominator_Median,Real_FG_NumDenAv
0,C&L 012010-11,C&L 01,2010-11,Family Group 1,3.710950,3.790920e+06,923124.125,3.999014,2.409281e+06,695444.0,4.106620
1,C&L 012011-12,C&L 01,2011-12,Family Group 1,3.689641,3.901135e+06,1036476.000,3.478073,2.216213e+06,707515.5,3.763845
2,C&L 012012-13,C&L 01,2012-13,Family Group 1,3.297381,3.775534e+06,1065939.000,3.100354,2.228935e+06,714443.0,3.541979
3,C&L 012013-14,C&L 01,2013-14,Family Group 1,3.179201,3.436062e+06,1040247.500,3.079052,2.497791e+06,687793.5,3.303120
4,C&L 012014-15,C&L 01,2014-15,Family Group 1,3.403019,3.508778e+06,993567.375,3.340128,2.320975e+06,689040.0,3.531495
...,...,...,...,...,...,...,...,...,...,...,...
3871,SW 072017-18,SW 07,2017-18,Family Group 4,0.865630,,,0.873106,,,
3872,SW 072018-19,SW 07,2018-19,Family Group 4,0.824202,,,0.862044,,,
3873,SW 072019-20,SW 07,2019-20,Family Group 4,0.848583,,,0.849656,,,
3874,SW 072020-21,SW 07,2020-21,Family Group 4,0.842375,,,0.842500,,,


#### Output File

In [225]:
FGAverages.to_csv('Data Files\\Family Averages.csv',index=False, encoding='utf-8-sig')

### Separate Scottish Values

In [226]:
ScotValues = merged_data.copy(deep=True)
ScotValues = ScotValues[['Code', 'Period', 'Scot_Real_Value', 'Scot_Real_Numerator', 'Scot_Real_Denominator']]
ScotValues = ScotValues.groupby(['Code', 'Period'], as_index=False).first()
ScotValues.to_csv('Data Files\\Scottish Values.csv',index=False, encoding='utf-8-sig')
ScotValues

Unnamed: 0,Code,Period,Scot_Real_Value,Scot_Real_Numerator,Scot_Real_Denominator
0,C&L 01,2010-11,4.374884,198956051.7,45459818.0
1,C&L 01,2011-12,3.916959,188806026.5,48202343.0
2,C&L 01,2012-13,3.734560,192795134.1,51624697.0
3,C&L 01,2013-14,3.710667,197855971.2,53320837.0
4,C&L 01,2014-15,3.408087,179623656.7,52705262.0
...,...,...,...,...,...
964,SW 07,2017-18,0.853846,,
965,SW 07,2018-19,0.821737,,
966,SW 07,2019-20,0.817997,,
967,SW 07,2020-21,0.825000,,


In [227]:
df = pd.read_csv('Data Files\\Scottish Values.csv')
df.columns

Index(['Code', 'Period', 'Scot_Real_Value', 'Scot_Real_Numerator',
       'Scot_Real_Denominator'],
      dtype='object')

### Ranking Calculations

#### Family Group Ranks

In [228]:
FamilyRanks = merged_data[[
                'Code',
                'LocalAuthority',
                'Period',
                'Real_Value',
                'Family_Group',
                'Ranking_GoldilocksMidpoint'
            ]]
FamilyRanks = FamilyRanks.copy(deep=True)
FamilyRanksGoldi = FamilyRanks.copy(deep=True)
FamilyRanks['FamilyRank_Desc'] = FamilyRanks.groupby(['Code', 'Period', 'Family_Group'])['Real_Value'].rank('min', ascending=False).astype(int)
FamilyRanks['FamilyRank_Asc'] = FamilyRanks.groupby(['Code', 'Period', 'Family_Group'])['Real_Value'].rank('min', ascending=True).astype(int)
FamilyRanks['FamilyRank_Desc_Pct'] = FamilyRanks.groupby(['Code', 'Period', 'Family_Group'])['Real_Value'].rank('min', ascending=False, pct=True).astype(float)
FamilyRanks['FamilyRank_Asc_Pct'] = FamilyRanks.groupby(['Code', 'Period', 'Family_Group'])['Real_Value'].rank('min', ascending=True, pct=True).astype(float)


def distance(Current, Previous):
    return (max(Previous, Current) - min(Previous, Current)) * (-1 if Previous > Current else 1)

def DifferenceFromGoldilocksMidPoint(df):
    if df['Ranking_GoldilocksMidpoint'] == None:
        return None
    else:
        return abs(distance(df['Real_Value'], df['Ranking_GoldilocksMidpoint']))


FamilyRanksGoldi['AbsoluteDifferenceFromGoldilocksMidPoint'] = FamilyRanksGoldi.apply(DifferenceFromGoldilocksMidPoint, axis=1)
FamilyRanksGoldi = FamilyRanksGoldi[pd.notnull(FamilyRanksGoldi['AbsoluteDifferenceFromGoldilocksMidPoint'])]
FamilyRanksGoldi['FamilyRank_Goldi'] = FamilyRanksGoldi.groupby(['Code', 'Period', 'Family_Group'])['AbsoluteDifferenceFromGoldilocksMidPoint'].rank('min', ascending=True).astype(int)
FamilyRanksGoldi['FamilyRank_Goldi_Pct'] = FamilyRanksGoldi.groupby(['Code', 'Period', 'Family_Group'])['AbsoluteDifferenceFromGoldilocksMidPoint'].rank('min', ascending=True, pct=True).astype(float)

FamilyRanks = FamilyRanks.merge(FamilyRanksGoldi[['Code', 'Period', 'LocalAuthority', 'FamilyRank_Goldi', 'FamilyRank_Goldi_Pct']], how='left', on=['Code', 'Period', 'LocalAuthority'], suffixes=('_FamilyRank', '_Goldi'))
FamilyRanks = FamilyRanks.merge(info, how = 'left', left_on= 'Code', right_on = 'Code_Sortable')

# Define functions needed to select correct ranking type and percentile type
def FamilyRank_select(df):
    if df['Ranking_Type'] == "Ascending":
        return df['FamilyRank_Asc']
    elif df['Ranking_Type'] == "Descending":
        return df['FamilyRank_Desc']
    elif df['Ranking_Type'] == "Goldilocks":
        return df['FamilyRank_Goldi']
    else:
        return None


def FamilyRank_Pct_select(df):
    if df['Ranking_Type'] == "Ascending":
        return df['FamilyRank_Asc_Pct']
    elif df['Ranking_Type'] == "Descending":
        return df['FamilyRank_Desc_Pct']
    elif df['Ranking_Type'] == "Goldilocks":
        return df['FamilyRank_Goldi_Pct']
    else:
        return None


# Apply functions above to create two new columns that contain the correct rank and percentile for each row
FamilyRanks['FamilyRank'] = FamilyRanks.apply(FamilyRank_select, axis=1)
FamilyRanks['FamilyPct'] = FamilyRanks.apply(FamilyRank_Pct_select, axis=1)

FamilyRanks = FamilyRanks[[
                'Code_x',
                'LocalAuthority',
                'Period',
                'FamilyRank',
                'FamilyPct'
            ]]
FamilyRanks = FamilyRanks.rename(columns = {'Code_x': 'Code'})


FamilyRanks


Unnamed: 0,Code,LocalAuthority,Period,FamilyRank,FamilyPct
0,C&L 01,Aberdeen City,2010-11,1.0,0.125
1,C&L 01,Aberdeen City,2011-12,1.0,0.125
2,C&L 01,Aberdeen City,2012-13,5.0,0.625
3,C&L 01,Aberdeen City,2013-14,5.0,0.625
4,C&L 01,Aberdeen City,2014-15,5.0,0.625
...,...,...,...,...,...
30384,SW 07,West Lothian,2017-18,5.0,0.625
30385,SW 07,West Lothian,2018-19,4.0,0.500
30386,SW 07,West Lothian,2019-20,5.0,0.625
30387,SW 07,West Lothian,2020-21,6.0,0.750


#### Scottish Ranks

In [229]:
ScotRanks = merged_data[[
                'Code',
                'LocalAuthority',
                'Period',
                'Real_Value',
                'Ranking_GoldilocksMidpoint'
            ]]
ScotRanks = ScotRanks.copy(deep=True)
ScotRanksGoldi = ScotRanks.copy(deep=True)

ScotRanks['ScotRank_Desc'] = ScotRanks.groupby(['Code', 'Period'])['Real_Value'].rank('min', ascending=False).astype(int)
ScotRanks['ScotRank_Asc'] = ScotRanks.groupby(['Code', 'Period'])['Real_Value'].rank('min', ascending=True).astype(int)
ScotRanks['ScotRank_Desc_Pct'] = ScotRanks.groupby(['Code', 'Period'])['Real_Value'].rank('min', ascending=False, pct=True).astype(float)
ScotRanks['ScotRank_Asc_Pct'] = ScotRanks.groupby(['Code', 'Period'])['Real_Value'].rank('min', ascending=True, pct=True).astype(float)


def distance(Current, Previous):
    return (max(Previous, Current) - min(Previous, Current)) * (-1 if Previous > Current else 1)

def DifferenceFromGoldilocksMidPoint(df):
    if df['Ranking_GoldilocksMidpoint'] == None:
        return None
    else:
        return abs(distance(df['Real_Value'], df['Ranking_GoldilocksMidpoint']))


ScotRanksGoldi['AbsoluteDifferenceFromGoldilocksMidPoint'] = ScotRanks.apply(DifferenceFromGoldilocksMidPoint, axis=1)
ScotRanksGoldi = ScotRanksGoldi[pd.notnull(ScotRanksGoldi['AbsoluteDifferenceFromGoldilocksMidPoint'])]
ScotRanksGoldi['ScotRank_Goldi'] = ScotRanksGoldi.groupby(['Code', 'Period'])['AbsoluteDifferenceFromGoldilocksMidPoint'].rank('min', ascending=True).astype(int)
ScotRanksGoldi['ScotRank_Goldi_Pct'] = ScotRanksGoldi.groupby(['Code', 'Period'])['AbsoluteDifferenceFromGoldilocksMidPoint'].rank('min', ascending=True, pct=True).astype(float)

ScotRanks = ScotRanks.merge(ScotRanksGoldi[['Code', 'Period', 'LocalAuthority', 'ScotRank_Goldi', 'ScotRank_Goldi_Pct']], how='left', on=['Code', 'Period', 'LocalAuthority'], suffixes=('_ScotRank', '_Goldi'))

ScotRanks = ScotRanks.merge(info, how='left', left_on='Code', right_on='Code_Sortable')

# Define functions needed to select correct ranking type and percentile type


def ScotRank_select(df):
    if df['Ranking_Type'] == "Ascending":
        return df['ScotRank_Asc']
    elif df['Ranking_Type'] == "Descending":
        return df['ScotRank_Desc']
    elif df['Ranking_Type'] == "Goldilocks":
        return df['ScotRank_Goldi']
    else:
        return None


def ScotRank_Pct_select(df):
    if df['Ranking_Type'] == "Ascending":
        return df['ScotRank_Asc_Pct']
    elif df['Ranking_Type'] == "Descending":
        return df['ScotRank_Desc_Pct']
    elif df['Ranking_Type'] == "Goldilocks":
        return df['ScotRank_Goldi_Pct']
    else:
        return None


# Apply functions above to create two new columns that contain the correct rank and percentile for each row
ScotRanks['ScotRank'] = ScotRanks.apply(ScotRank_select, axis=1)
ScotRanks['ScotPct'] = ScotRanks.apply(ScotRank_Pct_select, axis=1)

ScotRanks = ScotRanks[[
    'Code_x',
    'LocalAuthority',
    'Period',
    'ScotRank',
    'ScotPct'
]]

ScotRanks = ScotRanks.rename(columns={'Code_x': 'Code'})

ScotRanks

Unnamed: 0,Code,LocalAuthority,Period,ScotRank,ScotPct
0,C&L 01,Aberdeen City,2010-11,1.0,0.03125
1,C&L 01,Aberdeen City,2011-12,1.0,0.03125
2,C&L 01,Aberdeen City,2012-13,24.0,0.75000
3,C&L 01,Aberdeen City,2013-14,23.0,0.71875
4,C&L 01,Aberdeen City,2014-15,20.0,0.62500
...,...,...,...,...,...
30384,SW 07,West Lothian,2017-18,17.0,0.53125
30385,SW 07,West Lothian,2018-19,16.0,0.50000
30386,SW 07,West Lothian,2019-20,24.0,0.75000
30387,SW 07,West Lothian,2020-21,19.0,0.59375


#### Merge Rankings into Main Data

In [230]:
IndicatorDataMain = merged_data.merge(FamilyRanks,how = 'left', on = ['Code', 'LocalAuthority', 'Period'])
IndicatorDataMain = IndicatorDataMain.merge(ScotRanks, how='left', on=['Code', 'LocalAuthority', 'Period'])
IndicatorDataMain

Unnamed: 0,Code,LocalAuthority,Period,Real_Value,Real_Numerator,Real_Denominator,Scot_Real_Value,Scot_Real_Numerator,Scot_Real_Denominator,FG_Type,Ranking_Type,Ranking_GoldilocksMidpoint,Family_Group,Numerator_Multipier,Denominator_Multiplier,FamilyRank,FamilyPct,ScotRank,ScotPct
0,C&L 01,Aberdeen City,2010-11,0.406679,7.850188e+05,1922292.0,4.374884,198956051.7,45459818.0,"Environmental, Culture & Leisure, Economic Dev...",Ascending,,Family Group 4,1000.0,1.0,1.0,0.125,1.0,0.03125
1,C&L 01,Aberdeen City,2011-12,0.922524,1.886603e+06,2045051.0,3.916959,188806026.5,48202343.0,"Environmental, Culture & Leisure, Economic Dev...",Ascending,,Family Group 4,1000.0,1.0,1.0,0.125,1.0,0.03125
2,C&L 01,Aberdeen City,2012-13,4.335545,9.381043e+06,2163756.0,3.734560,192795134.1,51624697.0,"Environmental, Culture & Leisure, Economic Dev...",Ascending,,Family Group 4,1000.0,1.0,5.0,0.625,24.0,0.75000
3,C&L 01,Aberdeen City,2013-14,4.044078,8.988324e+06,2222588.0,3.710667,197855971.2,53320837.0,"Environmental, Culture & Leisure, Economic Dev...",Ascending,,Family Group 4,1000.0,1.0,5.0,0.625,23.0,0.71875
4,C&L 01,Aberdeen City,2014-15,3.476122,8.645573e+06,2487138.0,3.408087,179623656.7,52705262.0,"Environmental, Culture & Leisure, Economic Dev...",Ascending,,Family Group 4,1000.0,1.0,5.0,0.625,20.0,0.62500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30384,SW 07,West Lothian,2017-18,0.870748,,,0.853846,,,"Children, Social Work and Housing indicators",Descending,,Family Group 3,,,5.0,0.625,17.0,0.53125
30385,SW 07,West Lothian,2018-19,0.854167,,,0.821737,,,"Children, Social Work and Housing indicators",Descending,,Family Group 3,,,4.0,0.500,16.0,0.50000
30386,SW 07,West Lothian,2019-20,0.812500,,,0.817997,,,"Children, Social Work and Housing indicators",Descending,,Family Group 3,,,5.0,0.625,24.0,0.75000
30387,SW 07,West Lothian,2020-21,0.843000,,,0.825000,,,"Children, Social Work and Housing indicators",Descending,,Family Group 3,,,6.0,0.750,19.0,0.59375


### Main Data File Transformations

#### Add Keys

In [231]:
IndicatorDataMain['Key_CodePeriod'] = IndicatorDataMain['Code'] + IndicatorDataMain['Period']
IndicatorDataMain['Key_CodePeriodFamilyGroup'] = IndicatorDataMain['Code'] + IndicatorDataMain['Period'] + IndicatorDataMain['Family_Group']
IndicatorDataMain['Key_CodePeriodLA'] = IndicatorDataMain['Code'] + IndicatorDataMain['Period'] + IndicatorDataMain['LocalAuthority']
IndicatorDataMain

Unnamed: 0,Code,LocalAuthority,Period,Real_Value,Real_Numerator,Real_Denominator,Scot_Real_Value,Scot_Real_Numerator,Scot_Real_Denominator,FG_Type,...,Family_Group,Numerator_Multipier,Denominator_Multiplier,FamilyRank,FamilyPct,ScotRank,ScotPct,Key_CodePeriod,Key_CodePeriodFamilyGroup,Key_CodePeriodLA
0,C&L 01,Aberdeen City,2010-11,0.406679,7.850188e+05,1922292.0,4.374884,198956051.7,45459818.0,"Environmental, Culture & Leisure, Economic Dev...",...,Family Group 4,1000.0,1.0,1.0,0.125,1.0,0.03125,C&L 012010-11,C&L 012010-11Family Group 4,C&L 012010-11Aberdeen City
1,C&L 01,Aberdeen City,2011-12,0.922524,1.886603e+06,2045051.0,3.916959,188806026.5,48202343.0,"Environmental, Culture & Leisure, Economic Dev...",...,Family Group 4,1000.0,1.0,1.0,0.125,1.0,0.03125,C&L 012011-12,C&L 012011-12Family Group 4,C&L 012011-12Aberdeen City
2,C&L 01,Aberdeen City,2012-13,4.335545,9.381043e+06,2163756.0,3.734560,192795134.1,51624697.0,"Environmental, Culture & Leisure, Economic Dev...",...,Family Group 4,1000.0,1.0,5.0,0.625,24.0,0.75000,C&L 012012-13,C&L 012012-13Family Group 4,C&L 012012-13Aberdeen City
3,C&L 01,Aberdeen City,2013-14,4.044078,8.988324e+06,2222588.0,3.710667,197855971.2,53320837.0,"Environmental, Culture & Leisure, Economic Dev...",...,Family Group 4,1000.0,1.0,5.0,0.625,23.0,0.71875,C&L 012013-14,C&L 012013-14Family Group 4,C&L 012013-14Aberdeen City
4,C&L 01,Aberdeen City,2014-15,3.476122,8.645573e+06,2487138.0,3.408087,179623656.7,52705262.0,"Environmental, Culture & Leisure, Economic Dev...",...,Family Group 4,1000.0,1.0,5.0,0.625,20.0,0.62500,C&L 012014-15,C&L 012014-15Family Group 4,C&L 012014-15Aberdeen City
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30384,SW 07,West Lothian,2017-18,0.870748,,,0.853846,,,"Children, Social Work and Housing indicators",...,Family Group 3,,,5.0,0.625,17.0,0.53125,SW 072017-18,SW 072017-18Family Group 3,SW 072017-18West Lothian
30385,SW 07,West Lothian,2018-19,0.854167,,,0.821737,,,"Children, Social Work and Housing indicators",...,Family Group 3,,,4.0,0.500,16.0,0.50000,SW 072018-19,SW 072018-19Family Group 3,SW 072018-19West Lothian
30386,SW 07,West Lothian,2019-20,0.812500,,,0.817997,,,"Children, Social Work and Housing indicators",...,Family Group 3,,,5.0,0.625,24.0,0.75000,SW 072019-20,SW 072019-20Family Group 3,SW 072019-20West Lothian
30387,SW 07,West Lothian,2020-21,0.843000,,,0.825000,,,"Children, Social Work and Housing indicators",...,Family Group 3,,,6.0,0.750,19.0,0.59375,SW 072020-21,SW 072020-21Family Group 3,SW 072020-21West Lothian


#### Select Only Required Columns

In [232]:
IndicatorDataMain = IndicatorDataMain[
                                        [
                                            'Key_CodePeriod',
                                            'Key_CodePeriodFamilyGroup',
                                            'Key_CodePeriodLA',
                                            'Code',
                                            'LocalAuthority',
                                            'Period',
                                            'Real_Value',
                                            'Real_Numerator',
                                            'Real_Denominator',
                                            'FamilyRank',
                                            'FamilyPct',
                                            'ScotRank',
                                            'ScotPct'
                                        ]
                                    ]
IndicatorDataMain

Unnamed: 0,Key_CodePeriod,Key_CodePeriodFamilyGroup,Key_CodePeriodLA,Code,LocalAuthority,Period,Real_Value,Real_Numerator,Real_Denominator,FamilyRank,FamilyPct,ScotRank,ScotPct
0,C&L 012010-11,C&L 012010-11Family Group 4,C&L 012010-11Aberdeen City,C&L 01,Aberdeen City,2010-11,0.406679,7.850188e+05,1922292.0,1.0,0.125,1.0,0.03125
1,C&L 012011-12,C&L 012011-12Family Group 4,C&L 012011-12Aberdeen City,C&L 01,Aberdeen City,2011-12,0.922524,1.886603e+06,2045051.0,1.0,0.125,1.0,0.03125
2,C&L 012012-13,C&L 012012-13Family Group 4,C&L 012012-13Aberdeen City,C&L 01,Aberdeen City,2012-13,4.335545,9.381043e+06,2163756.0,5.0,0.625,24.0,0.75000
3,C&L 012013-14,C&L 012013-14Family Group 4,C&L 012013-14Aberdeen City,C&L 01,Aberdeen City,2013-14,4.044078,8.988324e+06,2222588.0,5.0,0.625,23.0,0.71875
4,C&L 012014-15,C&L 012014-15Family Group 4,C&L 012014-15Aberdeen City,C&L 01,Aberdeen City,2014-15,3.476122,8.645573e+06,2487138.0,5.0,0.625,20.0,0.62500
...,...,...,...,...,...,...,...,...,...,...,...,...,...
30384,SW 072017-18,SW 072017-18Family Group 3,SW 072017-18West Lothian,SW 07,West Lothian,2017-18,0.870748,,,5.0,0.625,17.0,0.53125
30385,SW 072018-19,SW 072018-19Family Group 3,SW 072018-19West Lothian,SW 07,West Lothian,2018-19,0.854167,,,4.0,0.500,16.0,0.50000
30386,SW 072019-20,SW 072019-20Family Group 3,SW 072019-20West Lothian,SW 07,West Lothian,2019-20,0.812500,,,5.0,0.625,24.0,0.75000
30387,SW 072020-21,SW 072020-21Family Group 3,SW 072020-21West Lothian,SW 07,West Lothian,2020-21,0.843000,,,6.0,0.750,19.0,0.59375


#### Previous Row & First Row Dictionaries

In [233]:
#Sort rows of the all_LGBFData dataframe to ensure that we are getting the correct previous and first rows in the subsequent steps. This may not be strictly necessary as the data should already be in the correct sort order. It is here as a safeguard in case any sorting needs to be done in previous steps at a later date.
IndicatorDataMain = IndicatorDataMain.copy(deep=True)
IndicatorDataMain.sort_values(by=['LocalAuthority', 'Code', 'Period'], inplace=True)

#Define all variables that will be used to record the changes from previous/first.
Previouss = []
Previous = None
Firsts = []
First = None
First_Save = None
Local_Authority = ""
Code = ""
Period = ""
Real_Value = ""
Real_Numerator = ""
Real_Denominator = ""
ScotRank = ""
ScotPct = ""
FamilyRank = ""
FamilyPct = ""

# Loop over the all_LGBFDataframe and record previous and first into a python dictionary object. Save the objects for each row into the two list variables (Previouss and Firsts)
for row in IndicatorDataMain.itertuples():
    # If the curently stored Local_Authority and Code are both equal to the current row then this is not the first row for this indicator and local authority combination. As such Previous is calculated using all of the currently stored values in the variables (these are written to at the end of each loop) and First is populated using the stored dictionary in First_Save
    if Local_Authority == row.LocalAuthority and Code == row.Code:
        Previous = {
            'Real_Value': Real_Value,
            'Real_Numerator': Real_Numerator,
            'Real_Denominator': Real_Denominator,
            'ScotRank': ScotRank,
            'ScotPct': ScotPct,
            'FamilyRank': FamilyRank,
            'FamilyPct': FamilyPct
        }
        First = First_Save

    # If the curently stored Local_Authority and Code are both not equal to the current row then this is the first row for this indicator and local authority combination. as such the Previous object is set to None and the First object is populated using this rows values.
    elif Local_Authority != row.LocalAuthority or Code != row.Code:
        First_Save = {
            'Real_Value': row.Real_Value,
            'Real_Numerator': row.Real_Numerator,
            'Real_Denominator': row.Real_Denominator,
            'ScotRank': row.ScotRank,
            'ScotPct': row.ScotPct,
            'FamilyRank': row.FamilyRank,
            'FamilyPct': row.FamilyPct
        }
        First = None
        Previous = None

    # Append the First and Previous into their respective list variables.
    Previouss.append(Previous)
    Firsts.append(First)

    # Set all other variables to their respective columns values in the current row. This is used to both evaluate the if criteria above and to populate the next previous object.
    Local_Authority = row.LocalAuthority
    Code = row.Code
    Period = row.Period
    Real_Value = row.Real_Value
    Real_Numerator = row.Real_Numerator
    Real_Denominator = row.Real_Denominator
    ScotRank = row.ScotRank
    ScotPct = row.ScotPct
    FamilyRank = row.FamilyRank
    FamilyPct = row.FamilyPct

# Assign the two list variables into appropriately titled columns within our all_LGBFData dataframe
IndicatorDataMain['Previous_Row'] = Previouss
IndicatorDataMain['First_Row'] = Firsts

IndicatorDataMain


Unnamed: 0,Key_CodePeriod,Key_CodePeriodFamilyGroup,Key_CodePeriodLA,Code,LocalAuthority,Period,Real_Value,Real_Numerator,Real_Denominator,FamilyRank,FamilyPct,ScotRank,ScotPct,Previous_Row,First_Row
0,C&L 012010-11,C&L 012010-11Family Group 4,C&L 012010-11Aberdeen City,C&L 01,Aberdeen City,2010-11,0.406679,7.850188e+05,1922292.0,1.0,0.125,1.0,0.03125,,
1,C&L 012011-12,C&L 012011-12Family Group 4,C&L 012011-12Aberdeen City,C&L 01,Aberdeen City,2011-12,0.922524,1.886603e+06,2045051.0,1.0,0.125,1.0,0.03125,"{'Real_Value': 0.406679401, 'Real_Numerator': ...","{'Real_Value': 0.406679401, 'Real_Numerator': ..."
2,C&L 012012-13,C&L 012012-13Family Group 4,C&L 012012-13Aberdeen City,C&L 01,Aberdeen City,2012-13,4.335545,9.381043e+06,2163756.0,5.0,0.625,24.0,0.75000,"{'Real_Value': 0.922524281, 'Real_Numerator': ...","{'Real_Value': 0.406679401, 'Real_Numerator': ..."
3,C&L 012013-14,C&L 012013-14Family Group 4,C&L 012013-14Aberdeen City,C&L 01,Aberdeen City,2013-14,4.044078,8.988324e+06,2222588.0,5.0,0.625,23.0,0.71875,"{'Real_Value': 4.335545113, 'Real_Numerator': ...","{'Real_Value': 0.406679401, 'Real_Numerator': ..."
4,C&L 012014-15,C&L 012014-15Family Group 4,C&L 012014-15Aberdeen City,C&L 01,Aberdeen City,2014-15,3.476122,8.645573e+06,2487138.0,5.0,0.625,20.0,0.62500,"{'Real_Value': 4.044078167, 'Real_Numerator': ...","{'Real_Value': 0.406679401, 'Real_Numerator': ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30384,SW 072017-18,SW 072017-18Family Group 3,SW 072017-18West Lothian,SW 07,West Lothian,2017-18,0.870748,,,5.0,0.625,17.0,0.53125,"{'Real_Value': 0.849315068, 'Real_Numerator': ...","{'Real_Value': 0.848101266, 'Real_Numerator': ..."
30385,SW 072018-19,SW 072018-19Family Group 3,SW 072018-19West Lothian,SW 07,West Lothian,2018-19,0.854167,,,4.0,0.500,16.0,0.50000,"{'Real_Value': 0.870748299, 'Real_Numerator': ...","{'Real_Value': 0.848101266, 'Real_Numerator': ..."
30386,SW 072019-20,SW 072019-20Family Group 3,SW 072019-20West Lothian,SW 07,West Lothian,2019-20,0.812500,,,5.0,0.625,24.0,0.75000,"{'Real_Value': 0.854166667, 'Real_Numerator': ...","{'Real_Value': 0.848101266, 'Real_Numerator': ..."
30387,SW 072020-21,SW 072020-21Family Group 3,SW 072020-21West Lothian,SW 07,West Lothian,2020-21,0.843000,,,6.0,0.750,19.0,0.59375,"{'Real_Value': 0.8125, 'Real_Numerator': nan, ...","{'Real_Value': 0.848101266, 'Real_Numerator': ..."


#### Comparisons to First and Previous Rows

In [234]:
indicatorInfo_dict = info.set_index('Code_Sortable').to_dict('index')
indicatorInfo_dict = NocaseDict(indicatorInfo_dict)
# Define a function that will return an aim adjusted percentage change between two indicator values. There are two niche cases here. One where previous and current values are both 0 resulting in 0% in all cases. Another where only the previous value is 0 resulting in None being returned as it is not possible to calculate % change from 0. Having looked at the dataset this has only occured 3 times and only affects Orkney and Eilean Siar for CHN20b. Further to this changes in percentage indicators are calculated using 100 as a denominator rather than previous. This is to avoid situations where very small percentages return 1000% or more change (which for our purposes seemed unreasonable to report).
def PercentChange_AimAdjusted(Previous, Current, Code):
    Aim = None
    SignedChange = None
    PercentChange = None
    GoldiMid = None
    IsPercentage = False

    if Previous == 0 and Current == 0:
        PercentChange = 0

    indicatorInfo = indicatorInfo_dict[Code]
    Aim = indicatorInfo['Ranking_Type']
    GoldiMid = indicatorInfo['Ranking_GoldilocksMidpoint']
    IsPercentage = indicatorInfo['MeasureType'] == 'Percentage'

    if IsPercentage != True and Previous != 0:
        if Aim == "Descending":
            SignedChange = distance(Current, Previous)
            PercentChange = SignedChange/Previous
        if Aim == "Ascending":
            SignedChange = -distance(Current, Previous)
            PercentChange = SignedChange/Previous
        if Aim == "Goldilocks":
            Current_DistGoldi = abs(distance(Current, GoldiMid))
            Previous_DistGoldi = abs(distance(Previous, GoldiMid))
            SignedChange = distance(Previous_DistGoldi, Current_DistGoldi)
            PercentChange = SignedChange/Previous_DistGoldi
            
    elif IsPercentage == True and Previous != 0:
        if Aim == "Descending":
            PercentChange = distance(Current, Previous)
        if Aim == "Ascending":
            PercentChange = -distance(Current, Previous)
        if Aim == "Goldilocks":
            Current_DistGoldi = abs(distance(Current, GoldiMid))
            Previous_DistGoldi = abs(distance(Previous, GoldiMid))
            PercentChange = distance(Previous_DistGoldi, Current_DistGoldi)

    return PercentChange

# Define a function that calculates the changes in the current row by accessing values in the Previous_Row and First_Row python dictionaries.
def Changes(df):

    # Set the intial value of the return variable to none. This allows us to test to see if there were any changes present for a row and then return None instead of a dictionary of None values if not.
    Changes = None

    # Define all variables that will contain all of the relevant changes for a row.
    ScotRank_ChangeSincePrevious = None
    ScotPct_ChangeSincePrevious = None
    FamilyRank_ChangeSincePrevious = None
    FamilyPct_ChangeSincePrevious = None
    Real_Value_ChangeSincePrevious = None
    Real_Numerator_ChangeSincePrevious = None
    Real_Denominator_ChangeSincePrevious = None
    ScotRank_ChangeSinceFirst = None
    ScotPct_ChangeSinceFirst = None
    FamilyRank_ChangeSinceFirst = None
    FamilyPct_ChangeSinceFirst = None
    Real_Value_ChangeSinceFirst = None
    Real_Numerator_ChangeSinceFirst = None
    Real_Denominator_ChangeSinceFirst = None
    PercentChange_AimAdjusted_SincePrevious = None
    PercentChange_AimAdjusted_SinceFirst = None

    # If the value currently in Previous_Row is not None then there exists a previous object to calculate the changes using.
    if df['Previous_Row'] != None:
        # Calculate all differences by comparing the current rows value to the same columns value in the Previous_Row dictionary. Ranks and Percentiles are always positive so the calculations are more simple. The other values use the distance function defined at the start of the notebook to determine the signed difference between the values (comparing a current value of -1 to a previous value of 2 will result in -3 difference.)
        ScotRank_ChangeSincePrevious = - (df['ScotRank'] - df['Previous_Row'].get('ScotRank'))
        ScotPct_ChangeSincePrevious = - (df['ScotPct'] - df['Previous_Row'].get('ScotPct'))
        FamilyRank_ChangeSincePrevious = - (df['FamilyRank'] - df['Previous_Row'].get('FamilyRank'))
        FamilyPct_ChangeSincePrevious = - (df['FamilyPct'] - df['Previous_Row'].get('FamilyPct'))
        Real_Value_ChangeSincePrevious = distance(df['Real_Value'], df['Previous_Row'].get('Real_Value'))
        Real_Numerator_ChangeSincePrevious = distance(df['Real_Numerator'], df['Previous_Row'].get('Real_Numerator'))
        Real_Denominator_ChangeSincePrevious = distance(df['Real_Denominator'], df['Previous_Row'].get('Real_Denominator'))
        PercentChange_AimAdjusted_SincePrevious = PercentChange_AimAdjusted(df['Previous_Row'].get('Real_Value'), df['Real_Value'], df['Code'])
        #Set Changes to true to avoid creating a dictionary of None values
        Changes = True

    # If the value currently in First_Row is not None then there exists a previous object to calculate the changes using.
    if df['First_Row'] != None:
        # Calculate all differences by comparing the current rows value to the same columns value in the First_Row dictionary. Ranks and Percentiles are always positive so the calculations are more simple. The other values use the distance function defined at the start of the notebook to determine the signed difference between the values (comparing a current value of -1 to a previous value of 2 will result in -3 difference.)
        ScotRank_ChangeSinceFirst = - (df['ScotRank'] - df['First_Row'].get('ScotRank'))
        ScotPct_ChangeSinceFirst = - (df['ScotPct'] - df['First_Row'].get('ScotPct'))
        FamilyRank_ChangeSinceFirst = - (df['FamilyRank'] - df['First_Row'].get('FamilyRank'))
        FamilyPct_ChangeSinceFirst = - (df['FamilyPct'] - df['First_Row'].get('FamilyPct'))
        Real_Value_ChangeSinceFirst = distance(df['Real_Value'], df['First_Row'].get('Real_Value'))
        Real_Numerator_ChangeSinceFirst = distance(df['Real_Numerator'], df['First_Row'].get('Real_Numerator'))
        Real_Denominator_ChangeSinceFirst = distance(df['Real_Denominator'], df['First_Row'].get('Real_Denominator'))
        PercentChange_AimAdjusted_SinceFirst = PercentChange_AimAdjusted(df['First_Row'].get('Real_Value'), df['Real_Value'], df['Code'])
        #Set Changes to true to avoid creating a dictionary of None values
        Changes = True

    # If there were changes recorded in the previous steps then write these changes into a python dictionary and assign this to Changes
    if Changes != None:
        Changes = {
            "ScotRank_ChangeSincePrevious": ScotRank_ChangeSincePrevious,
            "ScotPct_ChangeSincePrevious": ScotPct_ChangeSincePrevious,
            "FamilyRank_ChangeSincePrevious": FamilyRank_ChangeSincePrevious,
            "FamilyPct_ChangeSincePrevious": FamilyPct_ChangeSincePrevious,
            "ScotRank_ChangeSinceFirst": ScotRank_ChangeSinceFirst,
            "ScotPct_ChangeSinceFirst": ScotPct_ChangeSinceFirst,
            "FamilyRank_ChangeSinceFirst": FamilyRank_ChangeSinceFirst,
            "FamilyPct_ChangeSinceFirst": FamilyPct_ChangeSinceFirst,
            "Real_Value_ChangeSincePrevious": Real_Value_ChangeSincePrevious,
            "Real_Numerator_ChangeSincePrevious": Real_Numerator_ChangeSincePrevious,
            "Real_Denominator_ChangeSincePrevious": Real_Denominator_ChangeSincePrevious,
            "Real_Value_ChangeSinceFirst": Real_Value_ChangeSinceFirst,
            "Real_Numerator_ChangeSinceFirst": Real_Numerator_ChangeSinceFirst,
            "Real_Denominator_ChangeSinceFirst": Real_Denominator_ChangeSinceFirst,
            "PercentChange_AimAdjusted_SincePrevious": PercentChange_AimAdjusted_SincePrevious,
            "PercentChange_AimAdjusted_SinceFirst": PercentChange_AimAdjusted_SinceFirst
        }

    return Changes


#Apply the above function and output it to a new column named Changes.
IndicatorDataMain['Changes'] = IndicatorDataMain.apply(Changes, axis=1)
IndicatorDataMain


Unnamed: 0,Key_CodePeriod,Key_CodePeriodFamilyGroup,Key_CodePeriodLA,Code,LocalAuthority,Period,Real_Value,Real_Numerator,Real_Denominator,FamilyRank,FamilyPct,ScotRank,ScotPct,Previous_Row,First_Row,Changes
0,C&L 012010-11,C&L 012010-11Family Group 4,C&L 012010-11Aberdeen City,C&L 01,Aberdeen City,2010-11,0.406679,7.850188e+05,1922292.0,1.0,0.125,1.0,0.03125,,,
1,C&L 012011-12,C&L 012011-12Family Group 4,C&L 012011-12Aberdeen City,C&L 01,Aberdeen City,2011-12,0.922524,1.886603e+06,2045051.0,1.0,0.125,1.0,0.03125,"{'Real_Value': 0.406679401, 'Real_Numerator': ...","{'Real_Value': 0.406679401, 'Real_Numerator': ...","{'ScotRank_ChangeSincePrevious': -0.0, 'ScotPc..."
2,C&L 012012-13,C&L 012012-13Family Group 4,C&L 012012-13Aberdeen City,C&L 01,Aberdeen City,2012-13,4.335545,9.381043e+06,2163756.0,5.0,0.625,24.0,0.75000,"{'Real_Value': 0.922524281, 'Real_Numerator': ...","{'Real_Value': 0.406679401, 'Real_Numerator': ...","{'ScotRank_ChangeSincePrevious': -23.0, 'ScotP..."
3,C&L 012013-14,C&L 012013-14Family Group 4,C&L 012013-14Aberdeen City,C&L 01,Aberdeen City,2013-14,4.044078,8.988324e+06,2222588.0,5.0,0.625,23.0,0.71875,"{'Real_Value': 4.335545113, 'Real_Numerator': ...","{'Real_Value': 0.406679401, 'Real_Numerator': ...","{'ScotRank_ChangeSincePrevious': 1.0, 'ScotPct..."
4,C&L 012014-15,C&L 012014-15Family Group 4,C&L 012014-15Aberdeen City,C&L 01,Aberdeen City,2014-15,3.476122,8.645573e+06,2487138.0,5.0,0.625,20.0,0.62500,"{'Real_Value': 4.044078167, 'Real_Numerator': ...","{'Real_Value': 0.406679401, 'Real_Numerator': ...","{'ScotRank_ChangeSincePrevious': 3.0, 'ScotPct..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30384,SW 072017-18,SW 072017-18Family Group 3,SW 072017-18West Lothian,SW 07,West Lothian,2017-18,0.870748,,,5.0,0.625,17.0,0.53125,"{'Real_Value': 0.849315068, 'Real_Numerator': ...","{'Real_Value': 0.848101266, 'Real_Numerator': ...","{'ScotRank_ChangeSincePrevious': -3.0, 'ScotPc..."
30385,SW 072018-19,SW 072018-19Family Group 3,SW 072018-19West Lothian,SW 07,West Lothian,2018-19,0.854167,,,4.0,0.500,16.0,0.50000,"{'Real_Value': 0.870748299, 'Real_Numerator': ...","{'Real_Value': 0.848101266, 'Real_Numerator': ...","{'ScotRank_ChangeSincePrevious': 1.0, 'ScotPct..."
30386,SW 072019-20,SW 072019-20Family Group 3,SW 072019-20West Lothian,SW 07,West Lothian,2019-20,0.812500,,,5.0,0.625,24.0,0.75000,"{'Real_Value': 0.854166667, 'Real_Numerator': ...","{'Real_Value': 0.848101266, 'Real_Numerator': ...","{'ScotRank_ChangeSincePrevious': -8.0, 'ScotPc..."
30387,SW 072020-21,SW 072020-21Family Group 3,SW 072020-21West Lothian,SW 07,West Lothian,2020-21,0.843000,,,6.0,0.750,19.0,0.59375,"{'Real_Value': 0.8125, 'Real_Numerator': nan, ...","{'Real_Value': 0.848101266, 'Real_Numerator': ...","{'ScotRank_ChangeSincePrevious': 5.0, 'ScotPct..."


#### Convert Python Dictionaries to JSON

In [235]:
def PreviousConvertToJson(df):
    Previous_Row = simplejson.dumps(df['Previous_Row'], ignore_nan=True)
    return Previous_Row


def FirstConvertToJson(df):
    First_Row = simplejson.dumps(df['First_Row'], ignore_nan=True)
    return First_Row


def ChangesConvertToJson(df):
    Changes = simplejson.dumps(df['Changes'], ignore_nan=True)
    return Changes


IndicatorDataMain['Previous_Row'] = IndicatorDataMain.apply(PreviousConvertToJson, axis=1)
IndicatorDataMain['First_Row'] = IndicatorDataMain.apply(FirstConvertToJson, axis=1)
IndicatorDataMain['Changes'] = IndicatorDataMain.apply(ChangesConvertToJson, axis=1)

IndicatorDataMain

Unnamed: 0,Key_CodePeriod,Key_CodePeriodFamilyGroup,Key_CodePeriodLA,Code,LocalAuthority,Period,Real_Value,Real_Numerator,Real_Denominator,FamilyRank,FamilyPct,ScotRank,ScotPct,Previous_Row,First_Row,Changes
0,C&L 012010-11,C&L 012010-11Family Group 4,C&L 012010-11Aberdeen City,C&L 01,Aberdeen City,2010-11,0.406679,7.850188e+05,1922292.0,1.0,0.125,1.0,0.03125,,,
1,C&L 012011-12,C&L 012011-12Family Group 4,C&L 012011-12Aberdeen City,C&L 01,Aberdeen City,2011-12,0.922524,1.886603e+06,2045051.0,1.0,0.125,1.0,0.03125,"{""Real_Value"": 0.406679401, ""Real_Numerator"": ...","{""Real_Value"": 0.406679401, ""Real_Numerator"": ...","{""ScotRank_ChangeSincePrevious"": -0.0, ""ScotPc..."
2,C&L 012012-13,C&L 012012-13Family Group 4,C&L 012012-13Aberdeen City,C&L 01,Aberdeen City,2012-13,4.335545,9.381043e+06,2163756.0,5.0,0.625,24.0,0.75000,"{""Real_Value"": 0.922524281, ""Real_Numerator"": ...","{""Real_Value"": 0.406679401, ""Real_Numerator"": ...","{""ScotRank_ChangeSincePrevious"": -23.0, ""ScotP..."
3,C&L 012013-14,C&L 012013-14Family Group 4,C&L 012013-14Aberdeen City,C&L 01,Aberdeen City,2013-14,4.044078,8.988324e+06,2222588.0,5.0,0.625,23.0,0.71875,"{""Real_Value"": 4.335545113, ""Real_Numerator"": ...","{""Real_Value"": 0.406679401, ""Real_Numerator"": ...","{""ScotRank_ChangeSincePrevious"": 1.0, ""ScotPct..."
4,C&L 012014-15,C&L 012014-15Family Group 4,C&L 012014-15Aberdeen City,C&L 01,Aberdeen City,2014-15,3.476122,8.645573e+06,2487138.0,5.0,0.625,20.0,0.62500,"{""Real_Value"": 4.044078167, ""Real_Numerator"": ...","{""Real_Value"": 0.406679401, ""Real_Numerator"": ...","{""ScotRank_ChangeSincePrevious"": 3.0, ""ScotPct..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30384,SW 072017-18,SW 072017-18Family Group 3,SW 072017-18West Lothian,SW 07,West Lothian,2017-18,0.870748,,,5.0,0.625,17.0,0.53125,"{""Real_Value"": 0.849315068, ""Real_Numerator"": ...","{""Real_Value"": 0.848101266, ""Real_Numerator"": ...","{""ScotRank_ChangeSincePrevious"": -3.0, ""ScotPc..."
30385,SW 072018-19,SW 072018-19Family Group 3,SW 072018-19West Lothian,SW 07,West Lothian,2018-19,0.854167,,,4.0,0.500,16.0,0.50000,"{""Real_Value"": 0.870748299, ""Real_Numerator"": ...","{""Real_Value"": 0.848101266, ""Real_Numerator"": ...","{""ScotRank_ChangeSincePrevious"": 1.0, ""ScotPct..."
30386,SW 072019-20,SW 072019-20Family Group 3,SW 072019-20West Lothian,SW 07,West Lothian,2019-20,0.812500,,,5.0,0.625,24.0,0.75000,"{""Real_Value"": 0.854166667, ""Real_Numerator"": ...","{""Real_Value"": 0.848101266, ""Real_Numerator"": ...","{""ScotRank_ChangeSincePrevious"": -8.0, ""ScotPc..."
30387,SW 072020-21,SW 072020-21Family Group 3,SW 072020-21West Lothian,SW 07,West Lothian,2020-21,0.843000,,,6.0,0.750,19.0,0.59375,"{""Real_Value"": 0.8125, ""Real_Numerator"": null,...","{""Real_Value"": 0.848101266, ""Real_Numerator"": ...","{""ScotRank_ChangeSincePrevious"": 5.0, ""ScotPct..."


### Latest Values Extraction

In [236]:
LatestValues = IndicatorDataMain.copy(deep=True)
LatestValues.sort_values(by=['Code', 'Period', 'LocalAuthority'], inplace=True)
LatestValues = LatestValues.groupby(['Code', 'LocalAuthority']).tail(1)
LatestValues = LatestValues[['Key_CodePeriod','Key_CodePeriodLA','Key_CodePeriodFamilyGroup', 'LocalAuthority', 'Code', 'Period', 'Real_Value', 'Real_Numerator', 'Real_Denominator', 'ScotRank', 'ScotPct', 'FamilyRank', 'FamilyPct', 'Previous_Row', 'First_Row', 'Changes']]
LatestValues

Unnamed: 0,Key_CodePeriod,Key_CodePeriodLA,Key_CodePeriodFamilyGroup,LocalAuthority,Code,Period,Real_Value,Real_Numerator,Real_Denominator,ScotRank,ScotPct,FamilyRank,FamilyPct,Previous_Row,First_Row,Changes
11,C&L 012021-22,C&L 012021-22Aberdeen City,C&L 012021-22Family Group 4,Aberdeen City,C&L 01,2021-22,4.797766,6.882382e+06,1434492.0,11.0,0.34375,3.0,0.375,"{""Real_Value"": 28.40821458, ""Real_Numerator"": ...","{""Real_Value"": 0.406679401, ""Real_Numerator"": ...","{""ScotRank_ChangeSincePrevious"": 2.0, ""ScotPct..."
23,C&L 012021-22,C&L 012021-22Aberdeenshire,C&L 012021-22Family Group 1,Aberdeenshire,C&L 01,2021-22,5.968627,7.080220e+06,1186235.0,19.0,0.59375,5.0,0.625,"{""Real_Value"": 30.81283181, ""Real_Numerator"": ...","{""Real_Value"": 5.52098096, ""Real_Numerator"": 6...","{""ScotRank_ChangeSincePrevious"": -2.0, ""ScotPc..."
35,C&L 012021-22,C&L 012021-22Angus,C&L 012021-22Family Group 3,Angus,C&L 01,2021-22,6.298500,3.562092e+06,565544.0,21.0,0.65625,6.0,0.750,"{""Real_Value"": 116.0810204, ""Real_Numerator"": ...","{""Real_Value"": 2.41542917, ""Real_Numerator"": 3...","{""ScotRank_ChangeSincePrevious"": 9.0, ""ScotPct..."
47,C&L 012021-22,C&L 012021-22Argyll & Bute,C&L 012021-22Family Group 1,Argyll & Bute,C&L 01,2021-22,5.823446,1.594698e+06,273840.0,17.0,0.53125,4.0,0.500,"{""Real_Value"": 19.68002168, ""Real_Numerator"": ...","{""Real_Value"": 2.982315608, ""Real_Numerator"": ...","{""ScotRank_ChangeSincePrevious"": -10.0, ""ScotP..."
59,C&L 012021-22,C&L 012021-22Clackmannanshire,C&L 012021-22Family Group 3,Clackmannanshire,C&L 01,2021-22,4.846526,5.595432e+05,115452.0,12.0,0.37500,3.0,0.375,"{""Real_Value"": 17.83068199, ""Real_Numerator"": ...","{""Real_Value"": 3.697085464, ""Real_Numerator"": ...","{""ScotRank_ChangeSincePrevious"": -7.0, ""ScotPc..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30344,SW 072021-22,SW 072021-22South Ayrshire,SW 072021-22Family Group 3,South Ayrshire,SW 07,2021-22,0.734513,,,29.0,0.90625,7.0,0.875,"{""Real_Value"": 0.785, ""Real_Numerator"": null, ...","{""Real_Value"": 0.852631579, ""Real_Numerator"": ...","{""ScotRank_ChangeSincePrevious"": 1.0, ""ScotPct..."
30355,SW 072021-22,SW 072021-22South Lanarkshire,SW 072021-22Family Group 3,South Lanarkshire,SW 07,2021-22,0.760784,,,26.0,0.81250,6.0,0.750,"{""Real_Value"": 0.801, ""Real_Numerator"": null, ...","{""Real_Value"": 0.77027027, ""Real_Numerator"": n...","{""ScotRank_ChangeSincePrevious"": -0.0, ""ScotPc..."
30366,SW 072021-22,SW 072021-22Stirling,SW 072021-22Family Group 2,Stirling,SW 07,2021-22,0.880952,,,3.0,0.09375,1.0,0.125,"{""Real_Value"": 0.895, ""Real_Numerator"": null, ...","{""Real_Value"": 0.8, ""Real_Numerator"": null, ""R...","{""ScotRank_ChangeSincePrevious"": 6.0, ""ScotPct..."
30377,SW 072021-22,SW 072021-22West Dunbartonshire,SW 072021-22Family Group 4,West Dunbartonshire,SW 07,2021-22,0.877049,,,4.0,0.12500,1.0,0.125,"{""Real_Value"": 0.926, ""Real_Numerator"": null, ...","{""Real_Value"": 0.755102041, ""Real_Numerator"": ...","{""ScotRank_ChangeSincePrevious"": -2.0, ""ScotPc..."


### Error Checks

In [237]:
# Default position is to assume checks are passed. If any of the checks (excluding numerator denominator checks) are failed below this will be changed and the final csv's will not be output. The numerator denominator errors should be checked at each refresh. The known errors (which exist within the LGBF raw data file) will be identified in the readme in the Error Outputs folder.
ChecksFailed = False

# ScotRank should be between 32 and 1 and should not contain any NaN values
if not IndicatorDataMain['ScotRank'].between(1, 32).any() or IndicatorDataMain['ScotRank'].isnull().values.any():

    ChecksFailed = True
    maxrank = str(max(IndicatorDataMain['ScotRank']))
    minrank = str(min(IndicatorDataMain['ScotRank']))
    countnull = str(IndicatorDataMain['ScotRank'].isna().sum())

    print("ScotRank checks failed : Max - " + maxrank + ", Min - " + minrank + ", Count of Null - " + countnull)

# FamilyRank should be between 8 and 1 and should not contain any NaN values
if not IndicatorDataMain['FamilyRank'].between(1, 8).any() or IndicatorDataMain['FamilyRank'].isnull().values.any():

    ChecksFailed = True
    maxrank = str(max(IndicatorDataMain['FamilyRank']))
    minrank = str(min(IndicatorDataMain['FamilyRank']))
    countnull = str(IndicatorDataMain['FamilyRank'].isna().sum())

    print("FamilyRank checks failed : Max - " + maxrank + ", Min - " + minrank + ", Count of Null - " + countnull)

# Code, Local_Authority, Period, Real_Value and Cash_Value should not contain any null values
if IndicatorDataMain[['Code', 'LocalAuthority', 'Period', 'Real_Value']].isnull().values.any():

    ChecksFailed = True
    countnullCode = str(IndicatorDataMain['Code'].isna().sum())
    countnullLocal_Authority = str(IndicatorDataMain['LocalAuthority'].isna().sum())
    countnullPeriod = str(IndicatorDataMain['Period'].isna().sum())
    countnullReal_Value = str(IndicatorDataMain['Real_Value'].isna().sum())

    print("Null values found : Code - " + countnullCode + ", LocalAuthority - " + countnullLocal_Authority + ", Period - " + countnullPeriod + ", Real_Value - " + countnullReal_Value)

# Value should equal numerator/denominator for both cash and real - These errors have been checked and exist in the original raw data file.
NumDenCheck = IndicatorDataMain.copy(deep=True)
NumDenCheck = NumDenCheck[pd.notnull(NumDenCheck['Real_Numerator'])]

Real_NumDenDivide_Checks = []
Real_NumDenDivide_Check = None
FailReferences = []

for row in NumDenCheck.itertuples():
    if row.Real_Value == 0 or math.isnan(row.Real_Denominator) or math.isnan(row.Real_Numerator):
        Real_NumDenDivide_Check = None
    else:
        if math.isclose(row.Real_Numerator/row.Real_Denominator, row.Real_Value, rel_tol=0.02):
            Real_NumDenDivide_Check = True
        else:
            Real_NumDenDivide_Check = False
            FailReferences.append("Real;" + row.Code + ";" + row.Period + ";" + row.LocalAuthority + ";" + str(row.Real_Value) + ";" + str(row.Real_Numerator) + ";" + str(row.Real_Denominator))

    Real_NumDenDivide_Checks.append(Real_NumDenDivide_Check)

if False in Real_NumDenDivide_Checks :
    print("Numerator/Denominator values check failed : See Error Outputs for csv of failures")
    FailReferences = sorted(list(set(FailReferences)))
    FailReferences = pd.DataFrame([sub.split(";") for sub in FailReferences])
    FailReferences = FailReferences.rename(columns={0: 'Type', 1: 'Code', 2: 'Period', 3: 'Local Authority', 4: 'Value', 5: 'Numerator', 6: 'Denominator'})
    FailReferences.to_csv("Error Outputs//Numerator Denominator Fail References.csv", index=False, encoding='utf-8-sig')


Numerator/Denominator values check failed : See Error Outputs for csv of failures


### Export Main Data and Latest to CSV

In [238]:
if ChecksFailed == False :
    IndicatorDataMain.to_csv("Data Files//Indicator Data.csv", index=False, encoding='utf-8-sig')
    LatestValues.to_csv("Data Files//Latest Values.csv", index = False, encoding='utf-8-sig')
else :
    print("Checks failed! Check output from section 11 for detail")
