## Import historical data to MongoDB

In [55]:
import csv
import logging
import pymongo
import os
import codecs
import threading

inserted_uuids = set()

logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.DEBUG)

In [56]:
client = pymongo.MongoClient('localhost', 27017)
try:
    client.drop_database("tanken")
except:
    pass
db = client.tanken
preise = db.preise
stationen = db.stationen

In [57]:
def load_csv(path):
    with codecs.open(path,"r", "utf-8") as file:
        return [row for row in csv.DictReader(file)]

def load_and_insert_preis_csv(path, collection):
    logging.info("Loading "+ path)
    data = load_csv(path)
    logging.info("Loaded "+ path+ "! Inserting...")
    collection.insert_many(data)
    logging.info("Inserted!")

def load_and_insert_station_csv(path, collection):
    logging.info("Loading "+ path)
    data = load_csv(path)
    cleared_data= list()
    for d in data:
        if d["uuid"] not in inserted_uuids:
            cleared_data.append(d)
            inserted_uuids.add(d["uuid"])
    logging.info("Loaded "+ path+ "! Inserting...")
    if len(cleared_data)>0:
        collection.insert_many(cleared_data)
    logging.info("Inserted!")


def import_dataset(name, collection, function):
    daten_dir = "daten/"

    if not os.path.exists(daten_dir):
        logging.error("Daten-Ordner nicht gefunden")
    else:
        if name in os.listdir(daten_dir):
            logging.info("Started import of "+name)
            for year in os.listdir(daten_dir+name+"/"):
                logging.info(name +" -> year:"+ year)
                for month in os.listdir(daten_dir+name+"/"+year):
                    logging.info(name +" -> month:"+ month)
                    for day in os.listdir(daten_dir+name+"/"+year+"/"+month):
                        function(daten_dir+name+"/"+year+"/"+month+"/"+day, collection)
    logging.info("Finished import of "+name)


In [None]:
import_dataset("preise",preise,load_and_insert_preis_csv)
import_dataset("stationen",stationen,load_and_insert_station_csv)


INFO: Started import of preise
INFO: preise -> year:2020
INFO: preise -> month:01
INFO: Loading daten/preise/2020/01/2020-01-01-prices.csv
INFO: Loaded daten/preise/2020/01/2020-01-01-prices.csv! Inserting...
INFO: Inserted!
INFO: Loading daten/preise/2020/01/2020-01-02-prices.csv
INFO: Loaded daten/preise/2020/01/2020-01-02-prices.csv! Inserting...
INFO: Inserted!
INFO: Loading daten/preise/2020/01/2020-01-03-prices.csv


In [53]:
for i in stationen.aggregate([{"$group": {"_id": "$uuid","count": {"$sum": 1}}}]):
    if i["count"]!=1:
        print(i)