In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
def clean(location):
    """
    function that will clean a dataframe according to
    a file location 
    """
    def read(location):
        """ read the csv and error out if it fails """
        try:
            return pd.read_csv(loc)
        except:
            print(f'file not found at {loc}')
            return
        
    def subset_columns(dataframe, remove = ['PX_VOLUME']):
        """ removes irrelevant columns imported from bloomberg """
        df = dataframe.drop(columns = remove)
        return df
    
    def initialize_columns(dataframe):
        """ renames columns and sets time as index """
        df = dataframe.copy()
        df.columns = ['Date', 'Value']
        df.set_index('Date', inplace = True)
        return df

    
    return initialize_columns(subset_columns(read(location)))

In [4]:
def get_price_data(directory):
    """
    calling this will get all the csv in a directory, read and attempt to 
    clean them all, then outer join them along their dates
    """
    files = [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
    dfs = [clean(f) for f in files]
    return pd.concat(dfs, axis = 0)