## **STREAMING - OpenWeatherMap**

In [None]:
import datetime
import json
import kafka
import time
import requests

In [None]:
with open("/variables.json", "r") as file :
    data = json.load(file)

apikey = data["apikey"]
locations = data["locations"]
plants = data["plants"]

# Kafka configuration
kafka_broker = data["kafka"]["broker"]
kafka_topic = data["kafka"]["topic"]

URL_timemachine = "https://api.openweathermap.org/data/3.0/onecall/timemachine"

In [None]:
producer = kafka.KafkaProducer(
    bootstrap_servers=kafka_broker,
    value_serializer=lambda v: json.dumps(v).encode('utf-8')
)

In [None]:
def url(lat, lon, dt, units="metric") :
    return URL_timemachine + "?lat=%f&lon=%f&dt=%d&appid=%s&units=%s"%(lat, lon, dt, apikey, units)

def preprocess(data) :
    res =  {"lat": data["lat"], "lon": data["lon"]}
    res.update(data["data"][0])
    res.pop("weather", None)
    date = datetime.datetime.fromtimestamp(res["dt"])
    res["day"] =  date.day
    res["month"] = date.month
    res["year"] = date.year
    res["hour"] = date.hour
    res["minute"] = date.minute
    return res

def fetch(url) :
    response = requests.get(url)
    if response.status_code == 200 :
        data = response.json()
        return preprocess(data)
    else :
        return None

def collect(location = locations["nantes"], hours = range(8, 19), duration = 1) :
    day = datetime.date.today()
    for i in range(duration) :
        for hour in hours :
            dt_utc = datetime.datetime(day.year, day.month, day.day, hour, 30)
            unix_timestamp = int(dt_utc.timestamp())
            data_i = fetch(url(location["lat"], location["lon"], unix_timestamp))
            if data_i :
                producer.send(kafka_topic, value=data_i)
        producer.flush()
        print("Batch %d sent !"%(i + 1))
        time.sleep(2)
        day = day + datetime.timedelta(days = -1)

In [None]:
collect(duration = 7*5)