In [None]:
# -*- coding: utf-8 -*-

# imports
import pandas as pd
import requests
import os
import yaml

"""
sdg-csv-data-filler is the first module in a data pipeline to take
data from the SDG data repo and make it exportable as CSVW.
"""

# setting paths to directories and files
cwd = os.getcwd()
data_path = os.path.join(cwd, 'data')
out_path = os.path.join(cwd, 'out')
gap_filler_yam_path = (os.join
                       (data_path,
                        "gap_filler.yaml")
header_mapping_yam_path = (os.join
                           (data_path,
                            "header_mapping.yaml"))

def get_mapping_dicts(gap_filler_yaml, header_mapping_yaml):
    """
    Loads dictionaries for the gap filling and mapping of column names 
    from locally stored .yaml files
    
        Parameters:
            gap_filler_yaml (string): Path to the yaml file storing the values
                to fill gaps with in each column
            header_mapping_yaml (string): Path to the yaml file storing the 
                names to change headers to for each column
        Returns:
            dict: gap filler dict
            dict: header mapping dict
            
    """

def find_csv_urls(url):
    """
    Provided with a data folder URL, this function finds the URLS
    of the CSV files within the folder. A generator is yielded with
    the links of all files in the folder.
        Parameters:
            url (string): the URL of the repo/folder which contains
                the CSV files to be captured
        Yields:
            string: the next URL for the next CSV file in the folder 
    """
    
def csvs_to_pandas(url):
    """
    Provided with a URL of a file, the fucntion will check if the CSV
    is populated and if not empty return a Pandas dataframe of the CSV
        Parameters:
            url (string): the URL of a CSV file to be captured
        Returns:
            pd.DataFrame: a Pandas dataframe of the CSV
    """
       
def fill_gaps(pd_df, gap_filler_dict):
    """
    Given a Pandas dataframe and a dictionary containing the column names
    the correct 'fillers' for each column, this function will fill
    each column with the correct values when empty cells are found.
        Parameters:
            pd_df (pd.Dataframe): the variable to which the dataframe 
                containing the csv data is assigned
            gap_filler_dict (dict): a dictionary with column name and value 
                to fill gaps as key value pairs, e.g.
                {"Age":"All","Sex":"T"}
        Returns:
            pd.Dataframe: A dataframe with all cells full"""

def standardise_cell_values(pd_df, dict_of_nonstandard_standard):
    """
    Maps non-standard values e.g. "Males" to standard values like "M".
    Mapping is carried out on a column-specific basis.
    """                     
                       
def standardise_headers(pd_df, dict_of_nonstandard_standard):
    """
    Changes the non-standard CSV column headers to harmonised-data 
    column headers. 
    e.g. 'Age Group' to 'Age'
    """
    # Do whitespaces need to be replaced with underscores?                  

def write_csv(pd_df, out_path):
    """
    Converts a Pandas dataframe to CSV and writes it out to a local folder.
        Parameters:
            pd_df (pd.Dataframe): 
            path (string): the path of the local "out" folder
        Returns:
            writes csv """ #I am unsure how to express this output

def entry_point(data_url):
    urls_gen = find_csv_urls(data_url)
    gap_filler_dict, header_mapping_dict = (get_mapping_dicts
                                            (gap_filler_yaml, 
                                             header_mapping_yaml))
    for _url in urls_gen:
        df = csvs_to_pandas(_url)
        df = fill_gaps(df, gap_filler_dict)
        df = standardise_headers(df)
        write_csv(df, out_path)
