In [1]:
import re
import pymongo
import numpy as np
import pandas as pd
from os import listdir
from os.path import isfile,join

In [88]:
#List of stations ids that have a prediction dataset. Just copy the path where you have locally stored the folder.
onlyfiles=[f for f in listdir('path') if f.endswith("_pred.csv")]
ids=[int(re.sub('_pred.csv','',text)) for text in onlyfiles]
ids.sort()

In [82]:
def store(station_id):
    ids=str(station_id)
    
    #Reading the useful columns of the historical and prediction datasets. Also reading 
    #chicago_stations for extra information about the stations
    colnames1 = ['id', 'station_name', 'address', 'latitude', 'longitude']
    colnames2 = ['ts','total_docks','available_docks','available_bikes']
    colnames3 = ['ts','available_bikes']
    
    stats = pd.read_csv('chicago_stations.csv', names=colnames1,skiprows=[0])
    hist = pd.read_csv('data_output/id{}.csv'.format(station_id),names=colnames2,usecols=[1,3,5,6],skiprows=np.arange(0,145))
    pred = pd.read_csv('predictions/{}_pred.csv'.format(station_id), names=colnames3,usecols=[0,1],skiprows=[0])
    
    stats_id=stats[stats.id==station_id] #info for the given station id
    docks=int(hist.total_docks.iloc[-1]) #number of total docks at the time the last historical measure was taken
    
    #Adding a column for available docks into the prediction dataset
    pred['available_docks']=docks-pred['available_bikes']
    hist=hist.drop(['total_docks'],axis=1)
    
    #Sorting the columns to have the same order for both datasets
    cols = hist.columns.tolist()
    cols = [cols[0],cols[-1],cols[1]]
    hist=hist[cols]
    
    #Merging historical and predicted in one dataframe
    frames = [hist,pred]
    result = pd.concat(frames)
    
    #Creating the dicctionary with all the information about the given station
    doc={'id': station_id,'station_name':stats_id['station_name'].values[0],'address':stats_id['address'].values[0],
         'latitude':stats_id['latitude'].values[0],'longitude':stats_id['longitude'].values[0],
         'total_docks':docks,'results':result.to_dict('records')}
    
    #Connecting with mlab. Just check the folder with the credentials.
    try:
        with open("credentials/credentials.txt", 'r', encoding='utf-8') as f:
            [name,pasword,url,dbname]=f.read().splitlines()
        conn=pymongo.MongoClient("mongodb://{}:{}@{}/{}".format(name,pasword, url,dbname))
        print ("Connected successfully!!!")    
    except pymongo.errors.ConnectionFailure as e:
        print ("Could not connect to MongoDB: %s" % e) 
    conn
    
    #Storing the dicctionary in mlab
    db = conn['easyrider']
    collection = db['stations']
    collection.insert_one(doc)
    
    return doc

In [None]:
#And just run the function defined above with all the stations for which we have predictions
for i in ids:
    store(i)