## How to use in Local?

1. Download this repo locally.
1. Run this sample notebook which shows how to:
    1. Generate a mapping config file from a given conceptual mapping spreadsheet.
    1. Read raw data from a given input directory, and use the mapping config file to generate transformed data in the given output directory.


## Imports

In [1]:
import os
import transform
import create_config
import pandas as pd

## Parameters

In [2]:
 # Folder from where to read the raw data.
INPUT_DATA_FOLDER = "../data/raw"

# Folder from where to read the entity metadata.
ENTITY_PATH = "../../appendix/Entities/"

# Folder to where to write the mapping config file.
CONFIG_FOLDER = "../config"

# Folder to where to write the transformed data.
OUTPUT_DATA_FOLDER = "../data/transformed"

In [3]:
EXCEL_MAPPING_FILE = f"{CONFIG_FOLDER}/conceptual-mapping.xlsx"
OUTPUT_MAPPING_FOLDER = f"{OUTPUT_DATA_FOLDER}/mapping"
CONFIG_FILE = f"{CONFIG_FOLDER}/config.json"

# CREATE THE OUTPUT FOLDER IF NOT EXISTING
if not os.path.exists(OUTPUT_MAPPING_FOLDER):
    os.makedirs(OUTPUT_MAPPING_FOLDER)
if not os.path.exists(CONFIG_FOLDER):
    os.makedirs(CONFIG_FOLDER)

## Creating Configuration File

In [None]:
config_dictionary = create_config.create_entity_mappings_config(EXCEL_MAPPING_FILE)
config_dictionary = create_config.create_relationship_mappings_config(EXCEL_MAPPING_FILE, config_dictionary)
create_config.write_config_json(config_dictionary, CONFIG_FILE)

## Extracting Variables from Config

In [5]:
df = pd.read_json(CONFIG_FILE, typ="series")
ENTITY_LIST = df['entity_list']
RELATIONSHIP_LIST = df['relationship_list']

## Transform the raw data

In [None]:
transform.cache_entity_metadata(ENTITY_LIST, ENTITY_PATH)
transform.write_entity_data(INPUT_DATA_FOLDER, OUTPUT_DATA_FOLDER, OUTPUT_MAPPING_FOLDER, ENTITY_LIST, RELATIONSHIP_LIST)