In [None]:
import pandas as pd
from databakerUtils.sparsityFunctions import SparsityFiller

def Time_Values(value):
    # changes format from mmmyyyy to mmm-yy
    assert len(value) == 7, 'Date from source data has a different format to MMMYYYY'
    new_value = value[:3] + '-' + value[5:]
    return new_value

def Geography_Codes(value):
    # N.I. code is incorrect
    lookup = {'N92000001':'N92000002'}
    return lookup.get(value, value)

def Index_And_Year_Change(value):
    # codes and labels for index and year change dimension
    lookup = {
            '12m growth':'year-on-year-change', 
            'Index value':'index',
            'index':'Index',
            'year-on-year-change':'Year-on-year change'
            }
    return lookup[value]

def Transform():
    # transform as a function
    location = '' # change this value to source file path if not in same directory
    file = location + 'IPHRP INDEX CHART DATA.csv'
    output_file = location + 'v4-iphrp.csv'

    source = pd.read_csv(file, dtype=str)
    df_list = []

    for col in ['12m growth', 'Index value']:
        df_loop = pd.DataFrame()

        df_loop['v4_1'] = source[col]
        df_loop['Data Marking'] = ''

        df_loop['mmm-yy'] = source['Date'].apply(Time_Values)
        df_loop['Time'] = df_loop['mmm-yy']

        df_loop['administrative-geography'] = source['RegionCode'].apply(Geography_Codes)
        df_loop['Geography'] = source['Geography']

        df_loop['index-and-year-change'] = Index_And_Year_Change(col)
        df_loop['IndexAndYearChange'] = df_loop['index-and-year-change'].apply(Index_And_Year_Change)

        df_list.append(df_loop)

    df = pd.concat(df_list)

    # Corrects the data markings
    df.loc[df['v4_1'] == '-', 'Data Marking'] = '.'
    df.loc[df['v4_1'] == '-', 'v4_1'] = ''

    df.to_csv(output_file, index=False)
    SparsityFiller(output_file)

    print('Transform Complete')
    
if __name__ == '__main__':
    # runs the transform
    Transform()