# Data Collection Microservice

## Imports

In [1]:
# reload .py files on change:
# https://ipython.readthedocs.io/en/stable/config/extensions/autoreload.html?highlight=autoreload
%load_ext autoreload
%autoreload 2

import json
import time
import os
from datetime import datetime
import pandas as pd

# import custom classes
from OpenWeatherMap import OpenWeatherMap
from Kafka import KafkaConfluentWriter

## Collection Logic
The service needs to collect 5-day forecast data for our list of cities and pass the information on to Kafka.

In [2]:
openWeatherMap = OpenWeatherMap()
kafkaConfluentWriter = KafkaConfluentWriter('weather.forecast')

# helper function to format a timestamp to a readable datetime string
def format_date(dt: str) -> str:
    return datetime.fromtimestamp(float(dt)).strftime('%d.%m.%Y %H:%M')


# load 'locations.json' into a json-object and return it
def load_locations() -> json:
    with open('locations.json', mode='r') as file:
        return json.load(file)

# query open weather map api and only keep data of interest
# return a list of temperature forecasts for the cities
def get_transformed_forecast(cities: json) -> {}:
    forecasts = []

    for key in cities:
        city = cities[key]
        forecast = openWeatherMap.get_forecast(city) # get forecast for city lat/long

        value = {
            'city': city,
            'weather': []
        }

        preds = []
        for pred in forecast['list']:
            # for now we only care about the temperature
            # - same logic could be applied for other features like wind, humidity, etc.
            preds.append({
                'dt': pred['dt_txt'],
                'temp': pred['main']['temp'] 
            })
        value['weather'] = preds

        forecasts.append(value)
    cities = {
        'cities': forecasts
    }
    return cities

# for each location, query openWeatherMap for the 5-day forecast and 
# store the returned values in Kafka
def collect_forecast_data() -> None:
    try:
        print("Starting collection ...")

        cities = load_locations() # get locations from locations.json

        # TASK 3: BUFFERING
        while True: # look for new forecast information every 1 minute
            forecasts = get_transformed_forecast(cities) # get temp forecasts from OpenWeatherMap
            dt = format_date(datetime.timestamp(datetime.now())) # time of collection as message key

            print(f"Weather data collected at {dt}!")
            # store message with collection time and forecasts in Kafka
            kafkaConfluentWriter.produce(dt, forecasts) 

            time.sleep(60) # wait 1 minute

    except KeyboardInterrupt: # stop service with KeyboardInterrupt
        print("... collection stopped!")

## Run the Service

In [5]:
# collect new data every minute until service is stopped (KeyboardInterrupt)
collect_forecast_data()

Starting collection ...
Weather data collected at 24.08.2022 22:10!
Message delivered to weather.forecast [0]
Weather data collected at 24.08.2022 22:11!
Message delivered to weather.forecast [0]
Weather data collected at 24.08.2022 22:12!
Message delivered to weather.forecast [0]
... collection stopped!
