In [1]:
import numpy as np
import pandas as pd
import benedict

import config
import pinky

client = config.client
db = client[config.database]
col = db[config.weathers_collection]

In [2]:
pd.set_option('display.max_colwidth', None)
pd.options.display.float_format = '{:,.2f}'.format

In [3]:
def get_tp_list(cursor=None, collection=None, file=None):
    ''' Get the timeplaces into a list.
    Either read them from a file or collect them from the database or a
    collection cursor. If more than one parameter is given, the conents
    of the file will be returned, then collection, then cursor. ONLY ONE
    VARIABLE WILL BE RETURNED.
    '''
    
    if file:
        timeplaces = []
        with open('timeplaces.txt', 'r') as tps:
            for row in tps:
                timeplaces.append(row)
        return timeplaces
    if collection:
        raw_recs = col.find({})
        return set([doc['timeplace'] for doc in raw_recs])
    if cursor:
        return set([doc['timeplace'] for doc in cursor])
    return

def tups_to_dict(tups):
    ''' Convert a list of 2-tuples to a dictionary. '''
    dicti = {}
    for a, b in tups:
        dicti.setdefault(a, b)
    return dicti 


In [11]:
def records_to_rows(col, filters={}, limit=100):
    ''' Request records from the database collection and convert it to a
    pandas.DataFrame. All records are set with keys as column names and
    '_id' as the index.
    '''
    
    docs = col.find(filters, batch_size=100)[:limit]
    weathers = pd.DataFrame()
    temp = []
    for row in docs:
        if isinstance(row, dict):
            # Lookout for the occurance of a list and handle appropriately.
            for v in row.values():
                if isinstance(v, list):
                    row['weather'] = row['weather'][0]
            # These next lines convert the dicts to benedicts before
            # flattening, sorting by keys, and then converting back to dicts.
            bene = benedict.benedict.flatten(row)
            flat_bene = benedict.benedict(bene)
            sorted_flat_bene = flat_bene.items_sorted_by_keys()
            sorted_flat_dict = tups_to_dict(sorted_flat_bene)
            # Store in temp list as a pandas.DataFrame.
            temp.append(pd.DataFrame(sorted_flat_dict, index=[row['_id']]))
    return pd.concat(temp)

def flatten_to_single_row(df):
    ''' A function to convert a DataFrame to a single row DataFrame.
    This function takes each row of the dataframe and represents it as a
    single DataFrame row with a given index made by the string concatenation
    of the row number and the column name.
    
    :param df: the dataframe to be flattened
    :type df: pandas.DataFrame
    '''

    df.reset_index(inplace=True)
    index = []
    data = []
    for row in df.iterrows():
        for d, i in zip(row[1], row[1].index):
            index.append(str(i)+str(row[0]))
            data.append(d)
    d = pd.DataFrame(data, index=index)
    return d.transpose()

def make_instants(df, _return=True):
    ''' Convert the rows of the weathers collection DataFrame to a DataFrame of
    instants.
    
    This is useful when you have a DataFrame already built from rows of raw 
    data representing individual documents as they came from the database and
    you want to create a DataFrame of flattened DataFrames
    '''
    
    d = []
    timeplaces = df.index.unique(level='timeplace')
    for tp in timeplaces:
        temp_df = df.loc[tp]
        d.append(flatten_to_single_row(temp_df))
    if _return:
        return pd.concat(d)
    else:
        np.save('instants.npy', pd.concat(d))
        return

def sort_out_inst(tps, drop_cols=None, limit=None):
    ''' Start from the database and get the documents with the timeplace
    given in the tps list. Create a single DataFrame from those documents
    and flatten it before addingit to a DataFrame of instants.
    '''
    
    instants = []
    legits = []
    wtfs = []
    not_legits = []
    for row in list(tps)[:limit]:
        filters = {'timeplace': row.strip('\n')}
        rdf = records_to_rows(col, filters, limit=None)
        if len(rdf.index) >= 0:
            if drop_cols:
                rdf.drop(columns=drop_cols, inplace=True)
            rdf['tt_inst'] = rdf.loc[:, 'tt_inst'].apply(pinky.favor, trans=False)
            rdf.set_index(['timeplace', 'tt_inst', 'type'], inplace=True)
            rdf.sort_index(inplace=True)
            instants.append(flatten_to_single_row(rdf))
            legits.append(row)
        elif 'obs' in rdf['type'].values:
            not_legits.append(row)
        else:
            continue
            wtfs.append(row)
    with open('legits.txt', 'w') as leg:
        for l in legits:
            leg.write(l+'\n')
    with open('not_legits.txt', 'w') as nl:
        for l in not_legits:
            nl.write(l+'\n')
    with open('wtfs.txt', 'w') as wtf:
        for l in wtfs:
            wtf.write(l+'\n')
    return pd.concat(instants)


In [12]:
if __name__ == "__main__":
    drop_cols = ['_id', 'dt', 'dt_txt', 'location_lat',
        'location_lon', 'pop', 'sys_pod',
        'weather_description',
        'weather_icon', 'weather_id', 'weather_main'
        ]
    # timeplaces = get_tp_list(collection=col)
    instants_df = sort_out_inst(timeplaces, drop_cols=drop_cols, limit=10)#make_instants(pd.concat(tpd_list))
    instants_df.head()

Unnamed: 0,timeplace0,tt_inst0,type0,clouds_all0,instant0,main_feels_like0,main_grnd_level0,main_humidity0,main_pressure0,main_sea_level0,...,name40,sys_country40,sys_id40,sys_sunrise40,sys_sunset40,sys_type40,timezone40,rain_3h32,rain_3h33,rain_3h34
0,dnh3ws0000001600592400,0,cast,49,1600592400,282.21,992.0,67,1025,1025.0,...,,,,,,,,,,
0,dnhkzs0000001599717600,313200,cast,91,1599717600,294.71,971.0,93,1020,1020.0,...,,,,,,,,,,
0,dnhwqs0000001598810400,10800,cast,71,1598810400,305.19,942.0,58,1010,1011.0,...,,,,,,,,,,
0,dnh8fs0000001599145200,0,cast,1,1599145200,304.29,991.0,66,1017,1016.0,...,,,,,,,,,,
0,dnhxks0000001600700400,0,obs,1,1600700400,282.86,,76,1033,,...,,,,,,,,,,


In [220]:
def inst_diff(df):
    ''' Get the comparison DataFrame from the instant DataFrame
    
    :param df: the instants DataFrame
    :type df: pandas.DataFrame
    '''
    
    import numpy as np
    i = 0
#     df.set_index(['timeplace','type'])
#     df.drop(columns='_id', inplace=True)
    temp = []
    obs = df.loc[df['type'] == 'obs']
    cast_df = df.loc[df['type'] == 'cast']
    for o in obs.iterrows():
        o = o[1].to_numpy()
        for cast in cast_df.iterrows():
#             print(i)
#             i+=1
#             print(type(o))
            cast = cast[1].to_numpy()
#             print(type(cast))
            try:
                temp.append(np.subtract(cast, o))
            except TypeError as e:
                continue
#                 print(e, e.args)
#                 return "Sorry homey, can't do anyting for you."
            except KeyError as k:
                print(k, k.args)
#                 return "Sorry homey, can't do anyting for you."
#     print(df['type'])
#     if 'cast' in df['type'].values:
#         print(df['type'].loc('obs'))
    return pd.concat(temp)


In [4]:
def read_mongo_to_df(collection, filters={}, limit=None):
    ''' Read a MongoDB cursor to a pandas DataFrame.
    Arguments are "collection", which must be a MongoDB
    client.database.collection object, and "filters", which
    can be a well formed mongo query. "limit" will limit
    the number of documents returned on the cursor.
    '''

    documents = collection.find(filters)[:limit]
    return pd.DataFrame.from_records([doc for doc in documents])

def records_to_rows(col, filters={}, limit=100):
    ''' Request records from the database collection and convert it to a
    pandas.DataFrame. All records are set with keys as column names and
    '_id' as the index.
    '''
    
    docs = col.find(filters, batch_size=100)[:limit]
    weathers = pd.DataFrame()
    temp = []
    for row in docs:
        if isinstance(row, dict):
            # Lookout for the occurance of a list and handle appropriately.
            for v in row.values():
                if isinstance(v, list):
                    row['weather'] = row['weather'][0]
            # These next lines convert the dicts to benedicts before
            # flattening, sorting by keys, and then converting back to dicts.
            bene = benedict.benedict.flatten(row)
            flat_bene = benedict.benedict(bene)
            sorted_flat_bene = flat_bene.items_sorted_by_keys()
            sorted_flat_dict = tups_to_dict(sorted_flat_bene)
            # Store in temp list as a pandas.DataFrame.
            temp.append(pd.DataFrame(sorted_flat_dict, index=[row['_id']]))
    return pd.concat(temp)

def read_mongo_a(col, filters={}, limit=None):
    ''' Retrieve data from the Mongo database and transform it to a pandas
    DataFrame; return the DataFrame.

    :param col: the MongoDB collection to be read
    :type collection: pymongo.collection.Collection
    :param filters: a well formed MongoDB query
    :type filters: dict
    :param limit: optional limiter to the number of documents retrieved
    :type limit: int
    '''

    # Shorten the cursor length if limit is given, otherwise get everything;
    # transform the retrieved data to a pandas.DataFrame and return it.
    docs = col.find(filters)[:limit]
    weathers = []
    for doc in docs:
        if isinstance(doc, dict):
            for v in doc.values():
                if isinstance(v, list):
                    doc['weather'] = doc['weather'][0]
        # Convert the dict to a benedict, flatten it, sort it, convert it back
        # to a dict, and finally transform the dict to a DataFrame and append
        # it to a list to tbe concatted to together.
        bene = benedict.benedict(doc).flatten().items_sorted_by_keys()
        dic = tups_to_dict(bene)
        df = pd.DataFrame.from_dict(dic, orient='index')
        weathers.append(df.transpose())
    if limit:
        print(f'The length of your df has been limited to {limit}.')
    return pd.concat(weathers)


In [5]:
def find_item_with_kv_pair(series, key, value):
    '''Find and return the first item in a given pandas Series that has the
    given key-value pair.
    
    :param series: a pandas series
    :type series: pandas.Series
    :param key: the key the function should search for
    :type key: str
    :param value: the value the function should compare to
    :type value: I think anything that '==' can be used with
    
    :returns: the object found or None or raises TypeError
    '''
    
    if isinstance(series, pd.Series):
        for item in series:
            if isinstance(item, dict):
                if key in item:
                    if item[key] == value:
                        return item
            elif isinstance(item, list):
                for elem in item:
                    if elem[0] == key:
                        if elem[1] == value:
                            return item
        return None
    else:
        raise TypeError("find_item_with_key() wants a pandas.Series.")
        return
    
def flatten_to_series(df):
    ''' A function to convert a DataFrame to a Series.
    This function takes each row of the dataframe and represents it as a
    Series with a given index made by the string concatenation of the row
    number and the column name.
    
    :param df: the dataframe to be flattened
    :type df: pandas.DataFrame
    '''

    index = []
    data = []
    for row in df.iterrows():
        for d, i in zip(row[1], row[1].index):
            index.append(str(i)+str(row[0]))
            data.append(d)
    d = pd.DataFrame(data, index=index)
    return d

def flatten_to_single_row(df):
    ''' A function to convert a DataFrame to a single row DataFrame.
    This function takes each row of the dataframe and represents it as a
    single DataFrame row with a given index made by the string concatenation
    of the row number and the column name.
    
    :param df: the dataframe to be flattened
    :type df: pandas.DataFrame
    '''

    df.reset_index(inplace=True)
    index = []
    data = []
    for row in df.iterrows():
        for d, i in zip(row[1], row[1].index):
            index.append(str(i)+str(row[0]))
            data.append(d)
    d = pd.DataFrame(data, index=index)
    return d.transpose()

def flat_and_concat(flist):
    ''' Flatten a list of DataFrames and concat the flattened versions
    together and return as a single DataFrame.
    
    This is useful when you have a list of DataFrames, each of them
    representing a collection of related datasets
    
    :param flist: At list of pandas.DataFrames.
    :type flist: list
    '''
    D = []
    if not isinstance(flist, list):
        raise TypeError('flat_and_concat() has to have a list of DataFrames.')
        return
    for item in flist:
        if isinstance(item, pandas.DataFrame):
            D.append(flatten_to_single_row(item))
    return pd.concat(D)

def make_instants(df, _return=True):
    ''' Convert the rows of the weathers collection DataFrame to a DataFrame of
    instants.
    
    This is useful when you have a DataFrame already built from rows of raw 
    data representing individual documents as they came from the database and
    you want to create a DataFrame of flattened DataFrames
    '''
    
    d = []
    timeplaces = df.index.unique(level='timeplace')
    for tp in timeplaces:
        temp_df = df.loc[tp]
        d.append(flatten_to_single_row(temp_df))
    if _return:
        return pd.concat(d)
    else:
        np.save('instants.npy', pd.concat(d))
        return

### This is probably not useful within this repository. ###
def make_inst(df):
    ''' Create instant Series from the DataFrame: step through each row of the
    DataFrame and check the count of the row. If it is 42 or more, drop any na
    values, flatten each dict and append the Series to a new DataFrame and
    return it.
    '''
    
    instants = []
    for row in df.iterrows():
        if row[1].count() <= 37:
            continue
        row[1].dropna(inplace=True)
#         row[1].name = row[0]
        obs = find_item_with_kv_pair(row[1], 'type', 'obs')
        for item in row[1].iteritems():
            if isinstance(item[1], dict):
                for v in item[1].values():
                    if isinstance(v, list):
                        item[1]['weather'] = item[1]['weather'][0]
                if obs != None:
                    if item[1]['type'] == 'cast':
                        update_keys(item[1], obs)
            if isinstance(item[1], list) and obs != None:
                if item[1][0] == 'cast':
                    update_keys(item[1], obs)
        # These next lines convert the dicts to benedicts before flattening,
        # sorting by keys, and then converting back to dicts.
        flat_data = row[1].apply(benedict.benedict.flatten)
        sorted_items = flat_data.apply(benedict.benedict.items_sorted_by_keys)
        flat_sorted_data = sorted_items.apply(tups_to_dict)
        instants.append(flat_sorted_data)
    instants = pd.concat(instants, axis=1, ignore_index=False).transpose()
    np.save('instants.npy', instants)
    return instants
### This is probably not useful within this repository. ###

### This is probably not useful within this repository. ###
def make_data(series):
    ''' Take a pandas.Series and compare each of the items to one of the other
    items (dict comparisons) and return a pandas.Series of comparison results.
    '''

    data = []
    
    def key_strip(x):
        ''' Strip the keys from a dict. '''

        if isinstance(x, dict):
            return [x for x in x.values()]
        else:
            return x

    for item in series.iteritems():
        if isinstance(item[1], dict):
            data.append(key_strip(item[1]))
    return pd.Series(data, name=series.name, dtype=object)
### This is probably not useful within this repository. ###

### This is probably not useful within this repository. ###
def make_data_df(df):
    ''' Create the DataFrame that will contain the data to be used as the
    Data dataset to go along with the Target dataset. First make the instants
    DataFrame, then go through it row by row and remove all the items that
    are observation data. Finally save.
    '''
    
    data = []
    for row in df.iterrows():
#         data.append(make_data(row[0]))
#         row[1].name = row[0]
        obs = find_item_with_kv_pair(row[1], 'type', 'obs')
        for item in row[1].iteritems():
            if isinstance(item[1], dict):
                if obs != None:
                    if item[1]['type'] == 'obs':
#                         print(item)
                        row[1].pop(item[0])
                        break
        data.append(make_data(row[1]))
    data_df = pd.concat(data, axis=1, ignore_index=False).transpose()
#     data_df = strip_keys(data_df)#.transpose()
    np.save('forecast_values.npy', data_df)
    return data_df
### This is probably not useful within this repository. ###    

### This is probably not useful within this repository. ###
def make_deltas(series):
    ''' Take a pandas.Series and compare each of the items to one of the other
    items (dict comparisons) and return a pandas.Series of comparison results.
    '''

    deltas = []
    obs = find_item_with_kv_pair(series, 'type', 'obs')
    for item in series:
        if isinstance(item, dict) and obs != None:
            if item['type'] == 'cast':
                update_keys(item, obs)
                deltas.append(compare_dicts(obs, item, return_type='list'))
        if isinstance(item, list) and obs != None:
            if item[0] == 'cast':
                update_keys(item, obs)
                deltas.append(compare_dicts(obs, item, return_type='list'))
    return pd.Series(deltas, name=series.name, dtype=object)
### This is probably not useful within this repository. ###

### This is probably not useful within this repository. ###
def make_deltas_df(df):
    ''' Build the complete deltas DataFrame. '''
    
    deltas = []
    deltas_df = pd.DataFrame()
    
    # Create a DataFrame of the delta documemnts derived from the rows of
    # the supplied DataFrame. Add the DataFrame to a list so that it all
    # concatinates to a DataFrame. Then, row by row create the "deltas" for
    # the data and add it to the list. Finally concat all that together.
    deltas.append(deltas_df)
    for row in df.iterrows():
        deltas.append(make_deltas(row[1]))
    deltas_df = pd.concat(deltas, axis=1, ignore_index=False).transpose()
    np.save('delta_values.npy', deltas_df)
    return deltas_df
### This is probably not useful within this repository. ###


In [118]:
import pymongo

# col.create_index([('timeplace', pymongo.ASCENDING)])
tpd = [doc for doc in col.find({'timeplace': 'dnhzms0000001599091200'})]
len(tpd)

30

In [218]:
filters = {'timeplace': 'dnhzms0000001599091200'}
rdf = records_to_rows(col, filters, limit=None)
drop_cols = ['_id', 'dt', 'dt_txt', 'location_lat',
    'location_lon', 'pop', 'sys_pod',
    'weather_description',
    'weather_icon', 'weather_id', 'weather_main',
    'base', 'cod', 'id', 'name', 'sys_country',
    'sys_id', 'sys_sunrise', 'sys_sunset', 'sys_type', 'timezone']
    
# drop_cols = ['_id',
# #              'base',
#     'dt',
# #              'cod',
# #              'coord_lat',
# #              'coord_lon',
# #              'sys_type',
#      'dt_txt',
#      'pop',
#      'sys_pod',
#      'weather_description',
#      'weather_icon',
#      'weather_id',
#      'weather_main'
#     ]
rdf.drop(columns=drop_cols, inplace=True)
rdf['tt_inst'] = rdf.loc[:, 'tt_inst'].apply(pinky.favor, trans=False)
rdf.set_index(['timeplace', 'tt_inst'], inplace=True)
rdf.sort_index(inplace=True)

In [221]:
filters = {'timeplace': 'dnhzms0000001599091200'}

# rdf = records_to_rows(col, filters, limit=None)
obs = inst_diff(rdf)
obs
# cast = inst_diff(rdf)
# cast
rdf.columns
rdf.head()

ValueError: No objects to concatenate

In [216]:
instants = []
tps = []
with open('legits.txt', 'r') as tp:
    for row in tp:
        tps.append(row.strip('\n'))

def sort_out_inst(tps, limit=None):
    '''  '''
    
    instants = []
    legits = []
    wtfs = []
    not_legits = []
    for row in list(tps)[:limit]:
        filters = {'timeplace': row.strip('\n')}
        rdf = records_to_rows(col, filters, limit=None)
        if len(rdf.index) >= 40:
            drop_cols = ['_id',
    #              'base',
                'dt',
    #              'cod',
    #              'coord_lat',
    #              'coord_lon',
    #              'sys_type',
                 'dt_txt',
                 'pop',
                 'sys_pod',
                 'weather_description',
                 'weather_icon',
                 'weather_id',
                 'weather_main'
                ]
            rdf.drop(columns=drop_cols, inplace=True)
            rdf['tt_inst'] = rdf.loc[:, 'tt_inst'].apply(pinky.favor, trans=False)
            rdf.set_index(['timeplace', 'tt_inst', 'type'], inplace=True)
            rdf.sort_index(inplace=True)
            instants.append(rdf)
            legits.append(row)
        elif 'obs' in rdf['type'].values:
            not_legits.append(row)
        else:
            continue
            wtfs.append(row)
    with open('legits.txt', 'w') as leg:
        for l in legits:
            leg.write(l+'\n')
    with open('not_legits.txt', 'w') as nl:
        for l in not_legits:
            nl.write(l+'\n')
    with open('wtfs.txt', 'w') as wtf:
        for l in wtfs:
            wtf.write(l+'\n')
    return pd.concat(instants)


In [107]:
# inst = sort_out_inst(timeplaces, limit=100)
inst

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,clouds_all,instant,location_lat,location_lon,main_feels_like,main_grnd_level,main_humidity,main_pressure,main_sea_level,main_temp,...,coord_lon,id,name,sys_country,sys_id,sys_sunrise,sys_sunset,sys_type,timezone,wind_gust
timeplace,tt_inst,type,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
dnh5rs0000001600797600,0,cast,35,1600797600,34.34,-84.05,293.00,981.00,47,1025,1025.00,294.03,...,,,,,,,,,,
dnh5rs0000001600797600,0,obs,1,1600797600,34.34,-84.05,291.12,,52,1026,,292.70,...,-84.05,4190979.00,Dawsonville,US,4290.00,1600773876.00,1600817574.00,1.00,-14400.00,
dnh5rs0000001600797600,10800,cast,66,1600797600,34.34,-84.05,293.86,981.00,47,1024,1024.00,294.48,...,,,,,,,,,,
dnh5rs0000001600797600,21600,cast,79,1600797600,34.34,-84.05,294.68,981.00,44,1023,1023.00,295.35,...,,,,,,,,,,
dnh5rs0000001600797600,32400,cast,91,1600797600,34.34,-84.05,293.23,980.00,46,1023,1023.00,294.45,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
dnhvfs0000001600797600,388800,cast,0,1600797600,34.78,-83.21,293.62,971.00,46,1022,1022.00,295.04,...,,,,,,,,,,
dnhvfs0000001600797600,399600,cast,0,1600797600,34.78,-83.21,292.74,971.00,43,1022,1022.00,294.54,...,,,,,,,,,,
dnhvfs0000001600797600,410400,cast,0,1600797600,34.78,-83.21,292.74,971.00,43,1022,1022.00,294.54,...,,,,,,,,,,
dnhvfs0000001600797600,421200,cast,15,1600797600,34.78,-83.21,292.85,972.00,45,1023,1023.00,294.44,...,,,,,,,,,,


In [99]:
def make_

[]

In [33]:
rdf.shape

(30, 25)

In [55]:
inst_df = make_instants(inst)

In [56]:
inst_df.head()

Unnamed: 0,tt_inst0,type0,clouds_all0,instant0,location_lat0,location_lon0,main_feels_like0,main_grnd_level0,main_humidity0,main_pressure0,...,coord_lon40,id40,name40,sys_country40,sys_id40,sys_sunrise40,sys_sunset40,sys_type40,timezone40,rain_1h40
0,0,obs,25,1599091200,35.05,-83.08,297.55,,70,1013,...,,,,,,,,,,
0,0,cast,99,1600916400,34.83,-84.35,288.46,954.0,90,1017,...,,,,,,,,,,
0,151200,cast,8,1601089200,33.99,-83.39,294.81,994.0,97,1016,...,,,,,,,,,,
0,151200,cast,100,1598626800,34.04,-83.12,305.22,997.0,67,1017,...,,,,,,,,,,
0,0,cast,35,1600797600,34.34,-84.05,293.0,981.0,47,1025,...,,,,,,,,,,


In [15]:
inst_df.describe()

Unnamed: 0,tt_inst0,base0,clouds_all0,cod0,coord_lat0,coord_lon0,id0,instant0,location_lat0,location_lon0,...,timezone0,type0,visibility0,wind_deg0,wind_speed0,main_grnd_level0,main_sea_level0,main_temp_kf0,rain_3h0,rain_1h0
count,1000,25,1000,25.0,25.0,25.0,25.0,1000,1000.0,1000.0,...,25.0,1000,1000,1000,1000.0,975.0,975.0,975.0,461.0,2.0
unique,41,1,81,1.0,4.0,8.0,17.0,41,4.0,8.0,...,1.0,2,26,167,333.0,20.0,9.0,62.0,248.0,2.0
top,140400,stations,100,200.0,33.82,-84.22,4218165.0,1598119200,33.82,-84.22,...,-14400.0,cast,10000,0,1.5,983.0,1016.0,0.0,0.31,0.15
freq,25,25,325,25.0,8.0,4.0,2.0,25,328.0,164.0,...,25.0,975,975,20,21.0,111.0,234.0,876.0,10.0,1.0


In [None]:
for row in inst_df.columns:
    print(row)

In [None]:
make_data_df(inst)

In [None]:
make_deltas_df(inst)