In [1]:
import pandas as pd
import glob

#two input files
location = "*.csv"
files = glob.glob(location)

ratesFile = [file for file in files if "rates" in file][0]
levelsFile = [file for file in files if "levels" in file][0]

originalFile = 'v4-lms.csv'

dfRates = pd.read_csv(ratesFile, dtype = str)
dfLevels = pd.read_csv(levelsFile, dtype = str)

#renaming the columns we are interested in
renameCols = {
            'obs':'V4_0',
            'Time dim it id':'Time',
            'dim1itid':'EconomicActivity',
            'dim2itid':'AgeGroups',
            'dim3itid':'Sex',
            'dim4itid':'SeasonalAdjustment'
            }

dfRates = dfRates.rename(columns = renameCols)
dfLevels = dfLevels.rename(columns = renameCols)

#adding in codelist columns
dfRates['mmm-mmm-yyyy'] = dfRates['Time']
dfRates['uk-only'] = 'K02000001'
dfRates['Geography'] = 'United Kingdom'
dfRates['unit-of-measure'] = 'rates'
dfRates['UnitOfMeasure'] = 'Rates'
dfRates['economic-activity'] = dfRates['EconomicActivity']
dfRates['age-groups'] = dfRates['AgeGroups']
dfRates['seasonal-adjustment'] = dfRates['SeasonalAdjustment']

dfLevels['mmm-mmm-yyyy'] = dfLevels['Time']
dfLevels['uk-only'] = 'K02000001'
dfLevels['Geography'] = 'United Kingdom'
dfLevels['unit-of-measure'] = 'levels'
dfLevels['UnitOfMeasure'] = 'Levels'
dfLevels['economic-activity'] = dfLevels['EconomicActivity']
dfLevels['age-groups'] = dfLevels['AgeGroups']
dfLevels['seasonal-adjustment'] = dfLevels['SeasonalAdjustment']

dfRates = dfRates[[
        'V4_0', 'mmm-mmm-yyyy', 'Time', 'uk-only', 'Geography',
        'unit-of-measure', 'UnitOfMeasure', 'economic-activity','EconomicActivity', 
        'age-groups', 'AgeGroups', 'Sex', 'seasonal-adjustment', 'SeasonalAdjustment'
        ]]

dfLevels = dfLevels[[
        'V4_0', 'mmm-mmm-yyyy', 'Time', 'uk-only', 'Geography',
        'unit-of-measure', 'UnitOfMeasure', 'economic-activity','EconomicActivity', 
        'age-groups', 'AgeGroups', 'Sex', 'seasonal-adjustment', 'SeasonalAdjustment'
        ]]

'''Functions'''

def SeasonalValues(value):
    if value.startswith('Non'):
        return 'Non Seasonal Adjustment'
    else:
        return 'Seasonal Adjustment'
    
def SeasonalValueChanges(value):
    lookup = {
            'Non Seasonal Adjustment':'Not Seasonally Adjusted',
            'Seasonal Adjustment':'Seasonally Adjusted'
            }
    return lookup[value]

'''Post Processing'''

dfRates['mmm-mmm-yyyy'] = dfRates['mmm-mmm-yyyy'].apply(lambda x:x.replace(' ','-').lower())
dfLevels['mmm-mmm-yyyy'] = dfLevels['mmm-mmm-yyyy'].apply(lambda x:x.replace(' ','-').lower())

dfRates['economic-activity'] = dfRates['economic-activity'].apply(lambda x:x.replace(' ','-')
                                                                            .lower())
dfLevels['economic-activity'] = dfLevels['economic-activity'].apply(lambda x:x.replace(' ','-')
                                                                            .lower())

dfRates['sex'] = dfRates['Sex'].apply(lambda x:x.lower())
dfLevels['sex'] = dfLevels['Sex'].apply(lambda x:x.lower())

dfRates['SeasonalAdjustment'] = dfRates['SeasonalAdjustment'].apply(SeasonalValues)
dfRates['seasonal-adjustment'] = dfRates['SeasonalAdjustment'].apply(lambda x:x.replace(' ','-')
                                                                            .lower())
dfRates['SeasonalAdjustment'] = dfRates['SeasonalAdjustment'].apply(SeasonalValueChanges)

dfLevels['SeasonalAdjustment'] = dfLevels['SeasonalAdjustment'].apply(SeasonalValues)
dfLevels['seasonal-adjustment'] = dfLevels['SeasonalAdjustment'].apply(lambda x:x.replace(' ','-')
                                                                            .lower())
dfLevels['SeasonalAdjustment'] = dfLevels['SeasonalAdjustment'].apply(SeasonalValueChanges)

#combining the two
df = pd.concat([dfRates, dfLevels])

#moving data markings to a separate column
df.loc[df['V4_0'] == '*', 'Data Marking'] = '*'
df = df.rename(columns = {'V4_0':'V4_1'})
df = df[[
        'V4_1', 'Data Marking', 'mmm-mmm-yyyy', 'Time', 'uk-only', 'Geography',
        'unit-of-measure', 'UnitOfMeasure', 'economic-activity', 'EconomicActivity', 
        'age-groups', 'AgeGroups', 'sex', 'Sex', 'seasonal-adjustment', 'SeasonalAdjustment'
        ]]

originalDF = pd.read_csv(originalFile, dtype = str)
originalDF = pd.concat([originalDF, df]).drop_duplicates()

originalDF.to_csv(originalFile, index = False)
print('Transform complete!')



Transform complete!
