Generate fake data
====

First, we use the fake data generator to create 10000 documents that will be pushed to mongo using a connector.

In [None]:
import random
from pprint import pprint
from lib.generator_fake_data import FakeDataGenerator

generator = FakeDataGenerator(
    time_increment=1800,
    params=[{
        'name': 'city',
        'generator': lambda: random.choice(['Paris', 'London', 'Istanbul', 'Vezoul', 'Berlin']),
    }, {
        'name': 'type',
        'generator': lambda: random.choice(['A', 'B', 'C']),
    }, {
        'name': 'temperature',
        'generator': lambda: random.gauss(20, 5),
    }, {
        'name': 'pressure',
        'generator': lambda: random.gauss(998, 15),
    }, {
        'name': 'wind_speed',
        'generator': lambda: random.randint(0, 10),
    }]
)

docs = generator.generate_documents(50000)

print('{n} documents generated !'.format(n=len(docs)))
print('Here is the first document:')
pprint(docs[0])
print('Here is the last document:')
pprint(docs[-1])

The fixed range connector
======

Declare a fixed-range connector by providing the tag keys:
- `city`
- `type`

and the value keys:
- `temperature`
- `pressure`
- `wind_speed`

In [None]:
from lib.connector_fixed_range import FixedRangeConnector

config = {
    'uri': 'localhost',
    'port': 27017,
    'dbName': 'TestDb',
    'collectionName': 'documents',
}

frc = FixedRangeConnector(
    config,
    time_key='datetime',
    tag_keys=['city', 'type'],
    value_keys=['temperature', 'pressure', 'wind_speed']
)

print('Fixed-range connector initialized: ' + str(frc))

Push the documents into MongoDb using the connector
=========

It's super easy: simply use the `push` function of the connector. 
The fixed-range connector knows which fields to aggregates or groupby as we provided the corresponding keys when initializing it.

In [None]:
for i, doc in enumerate(docs):
    frc.push(doc)
    if i % 1000 == 999:
        print('{n} documents inserted!'.format(n=i+1), flush=True)

Get aggregates by days
======

In [None]:
import time
from datetime import datetime, timedelta

now = datetime.now()

start_time = time.time()
req = frc.getData(
    start=now,
    end=now + timedelta(days=500),
    interval='1m',
    tag_query={'city': 'Paris'},
    value_queries=[{'name': 'Average temperature', 'key': 'temperature', 'type': 'average'}]
)

result = list(req)
print('The request took: %s' %(time.time() - start_time))

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(15,6))

for i, data in enumerate(result):
    plt.plot(
        data['data']['datetimes'], 
        data['data']['Average temperature'], ['r--', 'b-', 'g-'][i % 3],
        label='{city} (type {type})'.format(city=data['metadata']['city'], type=data['metadata']['type'])
    )

plt.xlabel('dates')
plt.ylabel('average temperature')
plt.title('Average temperature in Paris')
plt.grid(True)
plt.legend()
plt.show()

Get aggregates by hours
======

In [None]:
now = datetime.now()

req = frc.getData(
    start=now,
    end=now + timedelta(hours=100),
    interval='1h',
    tag_query={'type': 'A'},
    value_queries=[{'name': 'pressure_avg', 'key': 'pressure', 'type': 'average'}]
)

result = list(req)

line_styles = ['r--', 'b-', 'g--']

plt.figure(figsize=(15,6))
for i, data in enumerate(result):
    plt.plot(
        data['data']['datetimes'], 
        data['data']['pressure_avg'], line_styles[i],
        label='{city} (type {type})'.format(city=data['metadata']['city'], type=data['metadata']['type'])
    )
plt.xlabel('dates')
plt.ylabel('average pressure')
plt.title('Average pressure of type A per hour')
plt.grid(True)
plt.legend()
plt.show()