In [1]:
import pandas as pd
from pymongo import MongoClient

import config
import overalls
import weather
import benedict

In [2]:
pd.set_option('display.max_colwidth', None)
pd.options.display.float_format = '{:,.2f}'.format

In [3]:
def _connect_mongo(uri, db):
    """ A util for making a connection to mongo """

    if uri:
        conn = MongoClient(uri)
    else:
        conn = MongoClient(host, port)
    return conn[db]

def read_mongo_to_df(uri, db, collection, query={}, limit=None):
    """ Read from Mongo and Store into DataFrame """

    con = _connect_mongo(uri, db)
    # Make a query to the specific DB and Collection
    if limit:
        cursor = con[collection].find(query)
        cursor = cursor[limit]
        print(f'number of indexes created has been limited to {limit} ..........................')
    else:
        cursor = con[collection].find(query)
    # Expand the cursor and construct the DataFrame
    df = pd.DataFrame.from_dict(cursor, orient='index')
    return df.transpose()

### ORIGINAL ###
# def forecast_fields(row):
#     row['rain_3h'] = row['rain'].get('3h', 0)
#     row['rain_1h'] = row['rain'].get('1h', row['rain_3h'] / 3)
#     del row['rain']
#     return row
### ORIGINAL ###

### COPY OF ORIGINAL ###
def forecast_fields(row):
    row['rain_3h'] = row['rain'].get('3h', row['rain'].get('1h', 0) * 3)
    row['rain_1h'] = row['rain'].get('1h', row['rain_3h'] / 3)
    del row['rain']
    return row
### COPY OF ORIGINAL ###

def read_mongo_a(uri, db, collection, limit=None, squash=False):
    ''' Retrieve data from the Mongo database and transform it to a pandas
    DataFrame; return the DataFrame.

    :param collection: the collection name
    :type collection: string
    :param limit: optional limiter to the number of documents retrieved. '''
    
    database = _connect_mongo(config.uri, db)
    col = {}
    
    cursor = database[collection].find({})
    # Shorten the cursor length if limit is given, otherwise get everything;
    # transform the retrieved data to a pandas.DataFrame and return it.
    dfs = []
    for doc in cursor[:limit]:
        
        ### trying to add the squash option to the DataFrame fields
        if squash:
            doc['weather'] = benedict.benedict(doc['weather']).flatten()
            doc['forecasts'] = [benedict.benedict(cast) for cast in doc['forecasts']]
        ### trying to add the squash option to the DataFrame fields
        
        dfs.append(
            pd.DataFrame.from_dict([{
                'zipcode': doc['zipcode'],
                'instant': doc['instant'],
                'type': 'weather',
                **doc['weather']
            }], orient='columns')  
### don't need the apply if the dict is flattented-- .apply(forecast_fields, axis=1)
        )
        dfs.append(
            pd.DataFrame.from_dict(
                [{
                    'zipcode': doc['zipcode'],
                    'instant': doc['instant'],
                    'type': 'forecast',
                    **forecast
                 } for forecast in doc['forecasts']]
            ).apply(forecast_fields, axis=1)
        )
    if limit:
        print(f'The length of your df has been limited to {limit}.')

    return pd.concat(dfs).set_index(['zipcode', 'instant', 'time_to_instant'], drop=True)
    
    # Let the user know that even though there were no errors the DataFrame
    # was not created.
    return 'From read_mongo(): There were no errors, but your dataframe was \
    not created.'


def errors(casts, obs):
    ''' Make a dict of errors for the forecasts. Any dicts in the arguments
    will be flattened before comparison.
    
    :param casts: a list of dictionaries
    :param obs: a dictionary
    
    * For best results all dicts should have all the same keys and subkeys.
    '''
    
    # Flatten all dicts and compare. Add the comparisons to a list and return.
    casts = [overalls.flatten_dict(cast) for cast in casts]
    obs = overalls.flatten_dict(obs)
    return [overalls.compare_dicts(cast, obs) for cast in casts]

def gen_errs_df(df):
    ''' Create an errors dataframe from the argument.
    
    :param df: Must be a pandas DataFrame.
    '''
    ### is there a way to step through three lists together? ###
    errs_list = []
    errs_dict = {}
    # Create the error dicts list to be added to the errs_dict.
    for (obs, casts) in zip(df['weather'], df['forecasts']): ### this creates a the list of errors from each instant
        errs_list.append(errors(casts, obs))
    for (_id, errs) in zip(df['_id'], errs_list):  ### this creates a dict from the errors list with the index as key
        errs_dict[_id] = errs
    dd = pd.DataFrame.from_dict(errs_dict, orient='index')
    # Replace the errors DataFrame dictionaries with a list of their values 
    for c in dd.columns:
        dd[c] = [list(d.values()) for d in dd[c]]
    return dd


In [26]:
collection = 'legit_inst'
db = 'owmap'
df = read_mongo_to_df(config.uri, db, collection, limit=5)
df.head(1)
# df = read_mongo_a(config.uri, db, collection, limit=1, squash=False)

number of indexes created has been limited to 5 ..........................


Unnamed: 0,_id,instant,zipcode,forecasts,weather
0,5ee355f51525079523946282,1592362800,27006,"[{'sunset_time': 0, 'sunrise_time': 0, 'clouds': 100, 'rain': {'3h': 1}, 'snow': {}, 'wind': {'speed': 4.12, 'deg': 33}, 'humidity': 96, 'pressure': {'press': 1021, 'sea_level': 1021}, 'temperature': {'temp': 288.39, 'temp_kf': 0, 'temp_max': 288.39, 'temp_min': 288.39}, 'status': 'Rain', 'detailed_status': 'light rain', 'weather_code': 500, 'weather_icon_name': '10n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 405836}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 100, 'rain': {}, 'snow': {}, 'wind': {'speed': 4.36, 'deg': 31}, 'humidity': 90, 'pressure': {'press': 1022, 'sea_level': 1022}, 'temperature': {'temp': 287.92, 'temp_kf': 0, 'temp_max': 287.92, 'temp_min': 287.92}, 'status': 'Clouds', 'detailed_status': 'overcast clouds', 'weather_code': 804, 'weather_icon_name': '04n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 391434}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 100, 'rain': {}, 'snow': {}, 'wind': {'speed': 4.36, 'deg': 31}, 'humidity': 90, 'pressure': {'press': 1022, 'sea_level': 1022}, 'temperature': {'temp': 287.92, 'temp_kf': 0, 'temp_max': 287.92, 'temp_min': 287.92}, 'status': 'Clouds', 'detailed_status': 'overcast clouds', 'weather_code': 804, 'weather_icon_name': '04n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 387838}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 100, 'rain': {}, 'snow': {}, 'wind': {'speed': 4.36, 'deg': 31}, 'humidity': 90, 'pressure': {'press': 1022, 'sea_level': 1022}, 'temperature': {'temp': 287.92, 'temp_kf': 0, 'temp_max': 287.92, 'temp_min': 287.92}, 'status': 'Clouds', 'detailed_status': 'overcast clouds', 'weather_code': 804, 'weather_icon_name': '04n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 380637}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 100, 'rain': {}, 'snow': {}, 'wind': {'speed': 4.36, 'deg': 31}, 'humidity': 90, 'pressure': {'press': 1022, 'sea_level': 1022}, 'temperature': {'temp': 287.92, 'temp_kf': 0, 'temp_max': 287.92, 'temp_min': 287.92}, 'status': 'Clouds', 'detailed_status': 'overcast clouds', 'weather_code': 804, 'weather_icon_name': '04n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 377038}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 100, 'rain': {}, 'snow': {}, 'wind': {'speed': 4.36, 'deg': 31}, 'humidity': 90, 'pressure': {'press': 1022, 'sea_level': 1022}, 'temperature': {'temp': 287.92, 'temp_kf': 0, 'temp_max': 287.92, 'temp_min': 287.92}, 'status': 'Clouds', 'detailed_status': 'overcast clouds', 'weather_code': 804, 'weather_icon_name': '04n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 373437}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 98, 'rain': {}, 'snow': {}, 'wind': {'speed': 3.61, 'deg': 45}, 'humidity': 95, 'pressure': {'press': 1024, 'sea_level': 1024}, 'temperature': {'temp': 287.29, 'temp_kf': 0, 'temp_max': 287.29, 'temp_min': 287.29}, 'status': 'Clouds', 'detailed_status': 'overcast clouds', 'weather_code': 804, 'weather_icon_name': '04n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 369838}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 98, 'rain': {}, 'snow': {}, 'wind': {'speed': 3.61, 'deg': 45}, 'humidity': 95, 'pressure': {'press': 1024, 'sea_level': 1024}, 'temperature': {'temp': 287.29, 'temp_kf': 0, 'temp_max': 287.29, 'temp_min': 287.29}, 'status': 'Clouds', 'detailed_status': 'overcast clouds', 'weather_code': 804, 'weather_icon_name': '04n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 366238}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 98, 'rain': {}, 'snow': {}, 'wind': {'speed': 3.61, 'deg': 45}, 'humidity': 95, 'pressure': {'press': 1024, 'sea_level': 1024}, 'temperature': {'temp': 287.29, 'temp_kf': 0, 'temp_max': 287.29, 'temp_min': 287.29}, 'status': 'Clouds', 'detailed_status': 'overcast clouds', 'weather_code': 804, 'weather_icon_name': '04n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 362637}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 98, 'rain': {}, 'snow': {}, 'wind': {'speed': 3.61, 'deg': 45}, 'humidity': 95, 'pressure': {'press': 1024, 'sea_level': 1024}, 'temperature': {'temp': 287.29, 'temp_kf': 0, 'temp_max': 287.29, 'temp_min': 287.29}, 'status': 'Clouds', 'detailed_status': 'overcast clouds', 'weather_code': 804, 'weather_icon_name': '04n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 359037}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 98, 'rain': {}, 'snow': {}, 'wind': {'speed': 3.61, 'deg': 45}, 'humidity': 95, 'pressure': {'press': 1024, 'sea_level': 1024}, 'temperature': {'temp': 287.29, 'temp_kf': 0, 'temp_max': 287.29, 'temp_min': 287.29}, 'status': 'Clouds', 'detailed_status': 'overcast clouds', 'weather_code': 804, 'weather_icon_name': '04n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 355437}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 98, 'rain': {}, 'snow': {}, 'wind': {'speed': 3.61, 'deg': 45}, 'humidity': 95, 'pressure': {'press': 1024, 'sea_level': 1024}, 'temperature': {'temp': 287.29, 'temp_kf': 0, 'temp_max': 287.29, 'temp_min': 287.29}, 'status': 'Clouds', 'detailed_status': 'overcast clouds', 'weather_code': 804, 'weather_icon_name': '04n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 351838}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 100, 'rain': {'3h': 1.61}, 'snow': {}, 'wind': {'speed': 3.75, 'deg': 46}, 'humidity': 96, 'pressure': {'press': 1023, 'sea_level': 1023}, 'temperature': {'temp': 287.79, 'temp_kf': 0, 'temp_max': 287.79, 'temp_min': 287.79}, 'status': 'Rain', 'detailed_status': 'light rain', 'weather_code': 500, 'weather_icon_name': '10n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 348237}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 100, 'rain': {'3h': 1.61}, 'snow': {}, 'wind': {'speed': 3.75, 'deg': 46}, 'humidity': 96, 'pressure': {'press': 1023, 'sea_level': 1023}, 'temperature': {'temp': 287.79, 'temp_kf': 0, 'temp_max': 287.79, 'temp_min': 287.79}, 'status': 'Rain', 'detailed_status': 'light rain', 'weather_code': 500, 'weather_icon_name': '10n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 344638}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 100, 'rain': {'3h': 1.61}, 'snow': {}, 'wind': {'speed': 3.75, 'deg': 46}, 'humidity': 96, 'pressure': {'press': 1023, 'sea_level': 1023}, 'temperature': {'temp': 287.79, 'temp_kf': 0, 'temp_max': 287.79, 'temp_min': 287.79}, 'status': 'Rain', 'detailed_status': 'light rain', 'weather_code': 500, 'weather_icon_name': '10n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 341037}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 100, 'rain': {'3h': 1.61}, 'snow': {}, 'wind': {'speed': 3.75, 'deg': 46}, 'humidity': 96, 'pressure': {'press': 1023, 'sea_level': 1023}, 'temperature': {'temp': 287.79, 'temp_kf': 0, 'temp_max': 287.79, 'temp_min': 287.79}, 'status': 'Rain', 'detailed_status': 'light rain', 'weather_code': 500, 'weather_icon_name': '10n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 337437}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 100, 'rain': {'3h': 1.61}, 'snow': {}, 'wind': {'speed': 3.75, 'deg': 46}, 'humidity': 96, 'pressure': {'press': 1023, 'sea_level': 1023}, 'temperature': {'temp': 287.79, 'temp_kf': 0, 'temp_max': 287.79, 'temp_min': 287.79}, 'status': 'Rain', 'detailed_status': 'light rain', 'weather_code': 500, 'weather_icon_name': '10n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 333838}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 100, 'rain': {'3h': 1.61}, 'snow': {}, 'wind': {'speed': 3.75, 'deg': 46}, 'humidity': 96, 'pressure': {'press': 1023, 'sea_level': 1023}, 'temperature': {'temp': 287.79, 'temp_kf': 0, 'temp_max': 287.79, 'temp_min': 287.79}, 'status': 'Rain', 'detailed_status': 'light rain', 'weather_code': 500, 'weather_icon_name': '10n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 330238}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 97, 'rain': {'3h': 0.16}, 'snow': {}, 'wind': {'speed': 3.98, 'deg': 36}, 'humidity': 90, 'pressure': {'press': 1024, 'sea_level': 1024}, 'temperature': {'temp': 287.93, 'temp_kf': 0, 'temp_max': 287.93, 'temp_min': 287.93}, 'status': 'Rain', 'detailed_status': 'light rain', 'weather_code': 500, 'weather_icon_name': '10n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 326637}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 97, 'rain': {'3h': 0.16}, 'snow': {}, 'wind': {'speed': 3.98, 'deg': 36}, 'humidity': 90, 'pressure': {'press': 1024, 'sea_level': 1024}, 'temperature': {'temp': 287.93, 'temp_kf': 0, 'temp_max': 287.93, 'temp_min': 287.93}, 'status': 'Rain', 'detailed_status': 'light rain', 'weather_code': 500, 'weather_icon_name': '10n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 323038}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 97, 'rain': {'3h': 0.16}, 'snow': {}, 'wind': {'speed': 3.98, 'deg': 36}, 'humidity': 90, 'pressure': {'press': 1024, 'sea_level': 1024}, 'temperature': {'temp': 287.93, 'temp_kf': 0, 'temp_max': 287.93, 'temp_min': 287.93}, 'status': 'Rain', 'detailed_status': 'light rain', 'weather_code': 500, 'weather_icon_name': '10n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 319437}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 97, 'rain': {'3h': 0.16}, 'snow': {}, 'wind': {'speed': 3.98, 'deg': 36}, 'humidity': 90, 'pressure': {'press': 1024, 'sea_level': 1024}, 'temperature': {'temp': 287.93, 'temp_kf': 0, 'temp_max': 287.93, 'temp_min': 287.93}, 'status': 'Rain', 'detailed_status': 'light rain', 'weather_code': 500, 'weather_icon_name': '10n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 315837}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 100, 'rain': {'3h': 3.1}, 'snow': {}, 'wind': {'speed': 5.05, 'deg': 39}, 'humidity': 96, 'pressure': {'press': 1023, 'sea_level': 1023}, 'temperature': {'temp': 287.93, 'temp_kf': 0, 'temp_max': 287.93, 'temp_min': 287.93}, 'status': 'Rain', 'detailed_status': 'moderate rain', 'weather_code': 501, 'weather_icon_name': '10n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 308636}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 100, 'rain': {'3h': 3.1}, 'snow': {}, 'wind': {'speed': 5.05, 'deg': 39}, 'humidity': 96, 'pressure': {'press': 1023, 'sea_level': 1023}, 'temperature': {'temp': 287.93, 'temp_kf': 0, 'temp_max': 287.93, 'temp_min': 287.93}, 'status': 'Rain', 'detailed_status': 'moderate rain', 'weather_code': 501, 'weather_icon_name': '10n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 301437}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 100, 'rain': {}, 'snow': {}, 'wind': {'speed': 3.53, 'deg': 46}, 'humidity': 94, 'pressure': {'press': 1024, 'sea_level': 1024}, 'temperature': {'temp': 289.13, 'temp_kf': 0, 'temp_max': 289.13, 'temp_min': 289.13}, 'status': 'Clouds', 'detailed_status': 'overcast clouds', 'weather_code': 804, 'weather_icon_name': '04n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 236638}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 100, 'rain': {}, 'snow': {}, 'wind': {'speed': 3.53, 'deg': 46}, 'humidity': 94, 'pressure': {'press': 1024, 'sea_level': 1024}, 'temperature': {'temp': 289.13, 'temp_kf': 0, 'temp_max': 289.13, 'temp_min': 289.13}, 'status': 'Clouds', 'detailed_status': 'overcast clouds', 'weather_code': 804, 'weather_icon_name': '04n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 233996}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 100, 'rain': {}, 'snow': {}, 'wind': {'speed': 3.53, 'deg': 46}, 'humidity': 94, 'pressure': {'press': 1024, 'sea_level': 1024}, 'temperature': {'temp': 289.13, 'temp_kf': 0, 'temp_max': 289.13, 'temp_min': 289.13}, 'status': 'Clouds', 'detailed_status': 'overcast clouds', 'weather_code': 804, 'weather_icon_name': '04n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 223197}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 100, 'rain': {'3h': 0.92}, 'snow': {}, 'wind': {'speed': 3.74, 'deg': 23}, 'humidity': 95, 'pressure': {'press': 1024, 'sea_level': 1024}, 'temperature': {'temp': 287.72, 'temp_kf': 0, 'temp_max': 287.72, 'temp_min': 287.72}, 'status': 'Rain', 'detailed_status': 'light rain', 'weather_code': 500, 'weather_icon_name': '10n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 212393}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 100, 'rain': {'3h': 0.92}, 'snow': {}, 'wind': {'speed': 3.74, 'deg': 23}, 'humidity': 95, 'pressure': {'press': 1024, 'sea_level': 1024}, 'temperature': {'temp': 287.72, 'temp_kf': 0, 'temp_max': 287.72, 'temp_min': 287.72}, 'status': 'Rain', 'detailed_status': 'light rain', 'weather_code': 500, 'weather_icon_name': '10n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 201594}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 100, 'rain': {'3h': 0.12}, 'snow': {}, 'wind': {'speed': 4.3, 'deg': 43}, 'humidity': 95, 'pressure': {'press': 1024, 'sea_level': 1024}, 'temperature': {'temp': 288.8, 'temp_kf': 0, 'temp_max': 288.8, 'temp_min': 288.8}, 'status': 'Rain', 'detailed_status': 'light rain', 'weather_code': 500, 'weather_icon_name': '10n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 190794}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 100, 'rain': {'3h': 5.59}, 'snow': {}, 'wind': {'speed': 4.8, 'deg': 36}, 'humidity': 96, 'pressure': {'press': 1023, 'sea_level': 1023}, 'temperature': {'temp': 288.34, 'temp_kf': 0, 'temp_max': 288.34, 'temp_min': 288.34}, 'status': 'Rain', 'detailed_status': 'moderate rain', 'weather_code': 501, 'weather_icon_name': '10n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 169193}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 100, 'rain': {'3h': 5.59}, 'snow': {}, 'wind': {'speed': 4.8, 'deg': 36}, 'humidity': 96, 'pressure': {'press': 1023, 'sea_level': 1023}, 'temperature': {'temp': 288.34, 'temp_kf': 0, 'temp_max': 288.34, 'temp_min': 288.34}, 'status': 'Rain', 'detailed_status': 'moderate rain', 'weather_code': 501, 'weather_icon_name': '10n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 158394}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 100, 'rain': {'3h': 1}, 'snow': {}, 'wind': {'speed': 4.65, 'deg': 35}, 'humidity': 95, 'pressure': {'press': 1023, 'sea_level': 1023}, 'temperature': {'temp': 287.55, 'temp_kf': 0, 'temp_max': 287.55, 'temp_min': 287.55}, 'status': 'Rain', 'detailed_status': 'light rain', 'weather_code': 500, 'weather_icon_name': '10n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 147595}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 100, 'rain': {'3h': 0.99}, 'snow': {}, 'wind': {'speed': 4.85, 'deg': 39}, 'humidity': 95, 'pressure': {'press': 1023, 'sea_level': 1023}, 'temperature': {'temp': 288.45, 'temp_kf': 0, 'temp_max': 288.45, 'temp_min': 288.45}, 'status': 'Rain', 'detailed_status': 'light rain', 'weather_code': 500, 'weather_icon_name': '10n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 136792}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 100, 'rain': {'3h': 0.99}, 'snow': {}, 'wind': {'speed': 4.85, 'deg': 39}, 'humidity': 95, 'pressure': {'press': 1023, 'sea_level': 1023}, 'temperature': {'temp': 288.45, 'temp_kf': 0, 'temp_max': 288.45, 'temp_min': 288.45}, 'status': 'Rain', 'detailed_status': 'light rain', 'weather_code': 500, 'weather_icon_name': '10n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 115195}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 100, 'rain': {'3h': 0.32}, 'snow': {}, 'wind': {'speed': 4.15, 'deg': 36}, 'humidity': 95, 'pressure': {'press': 1022, 'sea_level': 1022}, 'temperature': {'temp': 288.32, 'temp_kf': 0, 'temp_max': 288.32, 'temp_min': 288.32}, 'status': 'Rain', 'detailed_status': 'light rain', 'weather_code': 500, 'weather_icon_name': '10n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 104397}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 100, 'rain': {'3h': 0.3}, 'snow': {}, 'wind': {'speed': 4.75, 'deg': 28}, 'humidity': 94, 'pressure': {'press': 1022, 'sea_level': 1022}, 'temperature': {'temp': 287.15, 'temp_kf': 0, 'temp_max': 287.15, 'temp_min': 287.15}, 'status': 'Rain', 'detailed_status': 'light rain', 'weather_code': 500, 'weather_icon_name': '10n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 93596}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 100, 'rain': {'3h': 0.3}, 'snow': {}, 'wind': {'speed': 4.75, 'deg': 28}, 'humidity': 94, 'pressure': {'press': 1022, 'sea_level': 1022}, 'temperature': {'temp': 287.15, 'temp_kf': 0, 'temp_max': 287.15, 'temp_min': 287.15}, 'status': 'Rain', 'detailed_status': 'light rain', 'weather_code': 500, 'weather_icon_name': '10n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 82796}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 100, 'rain': {'3h': 0.3}, 'snow': {}, 'wind': {'speed': 4.75, 'deg': 28}, 'humidity': 94, 'pressure': {'press': 1022, 'sea_level': 1022}, 'temperature': {'temp': 287.15, 'temp_kf': 0, 'temp_max': 287.15, 'temp_min': 287.15}, 'status': 'Rain', 'detailed_status': 'light rain', 'weather_code': 500, 'weather_icon_name': '10n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 71995}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 100, 'rain': {}, 'snow': {}, 'wind': {'speed': 5.39, 'deg': 28}, 'humidity': 84, 'pressure': {'press': 1022, 'sea_level': 1022}, 'temperature': {'temp': 287.01, 'temp_kf': 0, 'temp_max': 287.01, 'temp_min': 287.01}, 'status': 'Clouds', 'detailed_status': 'overcast clouds', 'weather_code': 804, 'weather_icon_name': '04n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 61196}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 100, 'rain': {}, 'snow': {}, 'wind': {'speed': 5.39, 'deg': 28}, 'humidity': 84, 'pressure': {'press': 1022, 'sea_level': 1022}, 'temperature': {'temp': 287.01, 'temp_kf': 0, 'temp_max': 287.01, 'temp_min': 287.01}, 'status': 'Clouds', 'detailed_status': 'overcast clouds', 'weather_code': 804, 'weather_icon_name': '04n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 50395}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 100, 'rain': {'3h': 1.85}, 'snow': {}, 'wind': {'speed': 4.09, 'deg': 22}, 'humidity': 95, 'pressure': {'press': 1021, 'sea_level': 1021}, 'temperature': {'temp': 286.35, 'temp_kf': 0.03, 'temp_max': 286.35, 'temp_min': 286.32}, 'status': 'Rain', 'detailed_status': 'light rain', 'weather_code': 500, 'weather_icon_name': '10n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 17995}, {'sunset_time': 0, 'sunrise_time': 0, 'clouds': 100, 'rain': {'3h': 1.85}, 'snow': {}, 'wind': {'speed': 4.09, 'deg': 22}, 'humidity': 93, 'pressure': {'press': 1020, 'sea_level': 1021}, 'temperature': {'temp': 286.4, 'temp_kf': 0.08, 'temp_max': 286.4, 'temp_min': 286.32}, 'status': 'Rain', 'detailed_status': 'light rain', 'weather_code': 500, 'weather_icon_name': '10n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 7188}]","{'sunset_time': 1592354436, 'sunrise_time': 1592301882, 'clouds': 100, 'rain': {'1h': 0.25}, 'snow': {}, 'wind': {'speed': 5.81, 'deg': 36, 'gust': 5.81}, 'humidity': 89, 'pressure': {'press': 1020, 'sea_level': None}, 'temperature': {'temp': 286.48, 'temp_kf': None, 'temp_max': 286.48, 'temp_min': 286.48}, 'status': 'Rain', 'detailed_status': 'light rain', 'weather_code': 500, 'weather_icon_name': '10n', 'visibility_distance': None, 'dewpoint': None, 'humidex': None, 'heat_index': None, 'time_to_instant': 7461}"


In [27]:
def weatherize(row):
        row['weather'] = weather.Weather(row['zipcode'], 'observation', row['weather']).weather
        for cast in row['forecasts']:
            cast = weather.Weather(row['zipcode'], 'forecast', cast).weather
        return row

In [30]:
df = df.apply(weatherize, axis=1)

## Convert individual instants to dictionaries(or csv)

In [5]:
df.to_csv('test.csv')

### Create the error docs

In [31]:
for w in df['weather']:
    w.pop('sunset_time', 'sunrise_time')
    w.pop('sunrise_time')
for l in df['forecasts']:
    for f in l:
        f.pop('sunset_time', 'sunrise_time')
        f.pop('sunrise_time')


In [32]:
# err_dict = compare_dicts()
errs = []
for index, row in df[['forecasts', 'weather']].iterrows():
     errs.append(errors(row['forecasts'], row['weather']))
df['errs'] = errs
# df.head()

In [33]:
def strip_keys(dict_list):
    ''' Take a list of dictionaries and return a  list of lists of those
    dictionary values. 
    
    :param e: a list of dicts
    :type e: At least in forecast-forecast the input type will be a pandas
    Series, but in general it can take any list-like object of dictionaries.
    :return: a list of lists of dict values w/o keys
    '''
    
    values = []
    for d in dict_list:
        values.append([list(overalls.flatten_dict(dic).values()) for dic in d])
    return values

err_vals = strip_keys(df['errs'])
cast_vals = strip_keys(df['forecasts'])
dd = pd.DataFrame([cast_vals, err_vals], index=['forecasts', 'errors'])
dd = dd.transpose()


In [34]:
dd.head()

Unnamed: 0,forecasts,errors
0,"[[100, 1, 4.12, 33, 96, 1021, 1021, 288.39, 0, 288.39, 288.39, Rain, light rain, 500, 10n, None, None, None, None, 405836], [100, 4.36, 31, 90, 1022, 1022, 287.92, 0, 287.92, 287.92, Clouds, overcast clouds, 804, 04n, None, None, None, None, 391434], [100, 4.36, 31, 90, 1022, 1022, 287.92, 0, 287.92, 287.92, Clouds, overcast clouds, 804, 04n, None, None, None, None, 387838], [100, 4.36, 31, 90, 1022, 1022, 287.92, 0, 287.92, 287.92, Clouds, overcast clouds, 804, 04n, None, None, None, None, 380637], [100, 4.36, 31, 90, 1022, 1022, 287.92, 0, 287.92, 287.92, Clouds, overcast clouds, 804, 04n, None, None, None, None, 377038], [100, 4.36, 31, 90, 1022, 1022, 287.92, 0, 287.92, 287.92, Clouds, overcast clouds, 804, 04n, None, None, None, None, 373437], [98, 3.61, 45, 95, 1024, 1024, 287.29, 0, 287.29, 287.29, Clouds, overcast clouds, 804, 04n, None, None, None, None, 369838], [98, 3.61, 45, 95, 1024, 1024, 287.29, 0, 287.29, 287.29, Clouds, overcast clouds, 804, 04n, None, None, None, None, 366238], [98, 3.61, 45, 95, 1024, 1024, 287.29, 0, 287.29, 287.29, Clouds, overcast clouds, 804, 04n, None, None, None, None, 362637], [98, 3.61, 45, 95, 1024, 1024, 287.29, 0, 287.29, 287.29, Clouds, overcast clouds, 804, 04n, None, None, None, None, 359037], [98, 3.61, 45, 95, 1024, 1024, 287.29, 0, 287.29, 287.29, Clouds, overcast clouds, 804, 04n, None, None, None, None, 355437], [98, 3.61, 45, 95, 1024, 1024, 287.29, 0, 287.29, 287.29, Clouds, overcast clouds, 804, 04n, None, None, None, None, 351838], [100, 1.61, 3.75, 46, 96, 1023, 1023, 287.79, 0, 287.79, 287.79, Rain, light rain, 500, 10n, None, None, None, None, 348237], [100, 1.61, 3.75, 46, 96, 1023, 1023, 287.79, 0, 287.79, 287.79, Rain, light rain, 500, 10n, None, None, None, None, 344638], [100, 1.61, 3.75, 46, 96, 1023, 1023, 287.79, 0, 287.79, 287.79, Rain, light rain, 500, 10n, None, None, None, None, 341037], [100, 1.61, 3.75, 46, 96, 1023, 1023, 287.79, 0, 287.79, 287.79, Rain, light rain, 500, 10n, None, None, None, None, 337437], [100, 1.61, 3.75, 46, 96, 1023, 1023, 287.79, 0, 287.79, 287.79, Rain, light rain, 500, 10n, None, None, None, None, 333838], [100, 1.61, 3.75, 46, 96, 1023, 1023, 287.79, 0, 287.79, 287.79, Rain, light rain, 500, 10n, None, None, None, None, 330238], [97, 0.16, 3.98, 36, 90, 1024, 1024, 287.93, 0, 287.93, 287.93, Rain, light rain, 500, 10n, None, None, None, None, 326637], [97, 0.16, 3.98, 36, 90, 1024, 1024, 287.93, 0, 287.93, 287.93, Rain, light rain, 500, 10n, None, None, None, None, 323038], [97, 0.16, 3.98, 36, 90, 1024, 1024, 287.93, 0, 287.93, 287.93, Rain, light rain, 500, 10n, None, None, None, None, 319437], [97, 0.16, 3.98, 36, 90, 1024, 1024, 287.93, 0, 287.93, 287.93, Rain, light rain, 500, 10n, None, None, None, None, 315837], [100, 3.1, 5.05, 39, 96, 1023, 1023, 287.93, 0, 287.93, 287.93, Rain, moderate rain, 501, 10n, None, None, None, None, 308636], [100, 3.1, 5.05, 39, 96, 1023, 1023, 287.93, 0, 287.93, 287.93, Rain, moderate rain, 501, 10n, None, None, None, None, 301437], [100, 3.53, 46, 94, 1024, 1024, 289.13, 0, 289.13, 289.13, Clouds, overcast clouds, 804, 04n, None, None, None, None, 236638], [100, 3.53, 46, 94, 1024, 1024, 289.13, 0, 289.13, 289.13, Clouds, overcast clouds, 804, 04n, None, None, None, None, 233996], [100, 3.53, 46, 94, 1024, 1024, 289.13, 0, 289.13, 289.13, Clouds, overcast clouds, 804, 04n, None, None, None, None, 223197], [100, 0.92, 3.74, 23, 95, 1024, 1024, 287.72, 0, 287.72, 287.72, Rain, light rain, 500, 10n, None, None, None, None, 212393], [100, 0.92, 3.74, 23, 95, 1024, 1024, 287.72, 0, 287.72, 287.72, Rain, light rain, 500, 10n, None, None, None, None, 201594], [100, 0.12, 4.3, 43, 95, 1024, 1024, 288.8, 0, 288.8, 288.8, Rain, light rain, 500, 10n, None, None, None, None, 190794], [100, 5.59, 4.8, 36, 96, 1023, 1023, 288.34, 0, 288.34, 288.34, Rain, moderate rain, 501, 10n, None, None, None, None, 169193], [100, 5.59, 4.8, 36, 96, 1023, 1023, 288.34, 0, 288.34, 288.34, Rain, moderate rain, 501, 10n, None, None, None, None, 158394], [100, 1, 4.65, 35, 95, 1023, 1023, 287.55, 0, 287.55, 287.55, Rain, light rain, 500, 10n, None, None, None, None, 147595], [100, 0.99, 4.85, 39, 95, 1023, 1023, 288.45, 0, 288.45, 288.45, Rain, light rain, 500, 10n, None, None, None, None, 136792], [100, 0.99, 4.85, 39, 95, 1023, 1023, 288.45, 0, 288.45, 288.45, Rain, light rain, 500, 10n, None, None, None, None, 115195], [100, 0.32, 4.15, 36, 95, 1022, 1022, 288.32, 0, 288.32, 288.32, Rain, light rain, 500, 10n, None, None, None, None, 104397], [100, 0.3, 4.75, 28, 94, 1022, 1022, 287.15, 0, 287.15, 287.15, Rain, light rain, 500, 10n, None, None, None, None, 93596], [100, 0.3, 4.75, 28, 94, 1022, 1022, 287.15, 0, 287.15, 287.15, Rain, light rain, 500, 10n, None, None, None, None, 82796], [100, 0.3, 4.75, 28, 94, 1022, 1022, 287.15, 0, 287.15, 287.15, Rain, light rain, 500, 10n, None, None, None, None, 71995], [100, 5.39, 28, 84, 1022, 1022, 287.01, 0, 287.01, 287.01, Clouds, overcast clouds, 804, 04n, None, None, None, None, 61196], [100, 5.39, 28, 84, 1022, 1022, 287.01, 0, 287.01, 287.01, Clouds, overcast clouds, 804, 04n, None, None, None, None, 50395], [100, 1.85, 4.09, 22, 95, 1021, 1021, 286.35, 0.03, 286.35, 286.32, Rain, light rain, 500, 10n, None, None, None, None, 17995], [100, 1.85, 4.09, 22, 93, 1020, 1021, 286.4, 0.08, 286.4, 286.32, Rain, light rain, 500, 10n, None, None, None, None, 7188]]","[[0, -1.69, -3, 7, 1, 1.91, 1.91, 1.91, 0, 0, 0, 0, 398375], [0, -1.45, -5, 1, 2, 1.44, 1.44, 1.44, 1, 1, 304, 1, 383973], [0, -1.45, -5, 1, 2, 1.44, 1.44, 1.44, 1, 1, 304, 1, 380377], [0, -1.45, -5, 1, 2, 1.44, 1.44, 1.44, 1, 1, 304, 1, 373176], [0, -1.45, -5, 1, 2, 1.44, 1.44, 1.44, 1, 1, 304, 1, 369577], [0, -1.45, -5, 1, 2, 1.44, 1.44, 1.44, 1, 1, 304, 1, 365976], [-2, -2.2, 9, 6, 4, 0.81, 0.81, 0.81, 1, 1, 304, 1, 362377], [-2, -2.2, 9, 6, 4, 0.81, 0.81, 0.81, 1, 1, 304, 1, 358777], [-2, -2.2, 9, 6, 4, 0.81, 0.81, 0.81, 1, 1, 304, 1, 355176], [-2, -2.2, 9, 6, 4, 0.81, 0.81, 0.81, 1, 1, 304, 1, 351576], [-2, -2.2, 9, 6, 4, 0.81, 0.81, 0.81, 1, 1, 304, 1, 347976], [-2, -2.2, 9, 6, 4, 0.81, 0.81, 0.81, 1, 1, 304, 1, 344377], [0, -2.06, 10, 7, 3, 1.31, 1.31, 1.31, 0, 0, 0, 0, 340776], [0, -2.06, 10, 7, 3, 1.31, 1.31, 1.31, 0, 0, 0, 0, 337177], [0, -2.06, 10, 7, 3, 1.31, 1.31, 1.31, 0, 0, 0, 0, 333576], [0, -2.06, 10, 7, 3, 1.31, 1.31, 1.31, 0, 0, 0, 0, 329976], [0, -2.06, 10, 7, 3, 1.31, 1.31, 1.31, 0, 0, 0, 0, 326377], [0, -2.06, 10, 7, 3, 1.31, 1.31, 1.31, 0, 0, 0, 0, 322777], [-3, -1.83, 0, 1, 4, 1.45, 1.45, 1.45, 0, 0, 0, 0, 319176], [-3, -1.83, 0, 1, 4, 1.45, 1.45, 1.45, 0, 0, 0, 0, 315577], [-3, -1.83, 0, 1, 4, 1.45, 1.45, 1.45, 0, 0, 0, 0, 311976], [-3, -1.83, 0, 1, 4, 1.45, 1.45, 1.45, 0, 0, 0, 0, 308376], [0, -0.76, 3, 7, 3, 1.45, 1.45, 1.45, 0, 1, 1, 0, 301175], [0, -0.76, 3, 7, 3, 1.45, 1.45, 1.45, 0, 1, 1, 0, 293976], [0, -2.28, 10, 5, 4, 2.65, 2.65, 2.65, 1, 1, 304, 1, 229177], [0, -2.28, 10, 5, 4, 2.65, 2.65, 2.65, 1, 1, 304, 1, 226535], [0, -2.28, 10, 5, 4, 2.65, 2.65, 2.65, 1, 1, 304, 1, 215736], [0, -2.07, -13, 6, 4, 1.24, 1.24, 1.24, 0, 0, 0, 0, 204932], [0, -2.07, -13, 6, 4, 1.24, 1.24, 1.24, 0, 0, 0, 0, 194133], [0, -1.51, 7, 6, 4, 2.32, 2.32, 2.32, 0, 0, 0, 0, 183333], [0, -1.01, 0, 7, 3, 1.86, 1.86, 1.86, 0, 1, 1, 0, 161732], [0, -1.01, 0, 7, 3, 1.86, 1.86, 1.86, 0, 1, 1, 0, 150933], [0, -1.16, -1, 6, 3, 1.07, 1.07, 1.07, 0, 0, 0, 0, 140134], [0, -0.96, 3, 6, 3, 1.97, 1.97, 1.97, 0, 0, 0, 0, 129331], [0, -0.96, 3, 6, 3, 1.97, 1.97, 1.97, 0, 0, 0, 0, 107734], [0, -1.66, 0, 6, 2, 1.84, 1.84, 1.84, 0, 0, 0, 0, 96936], [0, -1.06, -8, 5, 2, 0.67, 0.67, 0.67, 0, 0, 0, 0, 86135], [0, -1.06, -8, 5, 2, 0.67, 0.67, 0.67, 0, 0, 0, 0, 75335], [0, -1.06, -8, 5, 2, 0.67, 0.67, 0.67, 0, 0, 0, 0, 64534], [0, -0.42, -8, -5, 2, 0.53, 0.53, 0.53, 1, 1, 304, 1, 53735], [0, -0.42, -8, -5, 2, 0.53, 0.53, 0.53, 1, 1, 304, 1, 42934], [0, -1.72, -14, 6, 1, -0.13, -0.13, -0.16, 0, 0, 0, 0, 10534], [0, -1.72, -14, 4, 0, -0.08, -0.08, -0.16, 0, 0, 0, 0, -273]]"


In [35]:
filename = 'error_set.csv'
dd.to_csv(filename, float_format='%.2f')