# Setting Logging

In [None]:
import logging
logging.basicConfig(level=logging.ERROR) # we can worry about little warnings later this is just an example of the overarching I dont want to go too deep

# Get Constelation Data

In [None]:
import os
import requests
import zipfile
import pandas as pd

def get_file(url, path):
    if os.path.exists(path):
        print("Skipping download {} already exists.".format(path))
        return None
    print("Downloading {} to {} ...".format(url, path))
    directory, file = os.path.split(os.path.abspath(path))
    if not os.path.exists(directory):
        os.mkdir(directory)
    r = requests.get(url, allow_redirects=True, verify=False)
    with open(path, "wb") as f:
        f.write(r.content)
    print("Complete")
        
def unzip(path, dest):
    if os.path.exists(dest):
        print("Skipping unzip as {} already exists.".format(dest))
        return None
    os.mkdir(dest)
    with zipfile.ZipFile(path, "r") as zip_ref:
        zip_ref.extractall(dest)

In [None]:
# populate this with url to your data
data_url = "****"
data_path = os.path.join(os.getcwd(), "datasets/constelation.csv")

In [None]:
get_file(data_url, data_path)

# Wrangle Dataset

This will be quite specific to our data so you may need to modify this to your liking.

In [None]:
import pandas as pd
import numpy as np

data = pd.read_csv(data_path).drop("totyield", axis=1).sort_values(by=["milk_date"])
data["milk_date"] = pd.to_datetime(data["milk_date"], format='%Y-%m-%d')
data = data.dropna()
data

In [None]:
i = 0
j = 0
inputs = {"x":[], 
          "y":[], 
#           "context":[]
         }
window_size = 21

for group in data.groupby(["itb"]):
    # brief calculation of days since last milking
    group = group[1] # get rid of tuple abstraction by groupby
    group["previous_milk_date"] =  group["milk_date"].shift(1)
    group["days_unmilked"] = (group["milk_date"] - group["previous_milk_date"]).dt.days / 30 # quic/ rough normalisation assuming max 30 days
    group.dropna(inplace=True)
    # dropping all non numeric types that we arent interested in
    example = group.select_dtypes(include=[np.number])
#     if i == 0:
#         print("grouped df", group)
#         print("numeric df", example)
    # caputre a rolling window going from oldest to newest
    # so the network cant be biased since it wont have seen the outcome before
    for window in example.rolling(window=window_size):
        if len(window["milkyield"]) == window_size:
#             if j == 0:
#                 print("Cleaned df:", window.iloc[:-1, :])
            # need to tell the network what goes where/ to what node
            inputs["x"].append(window.iloc[:-1, :].to_numpy()) # historic data in general except last (truth that it shouldnt see)
            inputs["y"].append(np.array(window["milkyield"].iloc[-1])) # ground truth value we expect I.E last one since in ascending cronology as we descend
#             inputs["context"].append(np.array(window["days_unmilked"].iloc[-1])) # get some additional context relevant to prediction
            j += 1
        i += 1

print(len(inputs["x"]))
# print("first input", inputs["x"][0], inputs["y"][0], inputs["context"][0])
# print("last input", inputs["x"][-1], inputs["y"][-1], inputs["context"][-1])

# Get Constelation Model

In [None]:
from fhez.nn.graph.prefab import milky, cnn_regressor

# network = milky(data_shape=inputs["x"][0].shape, filter_length=5, stride=3)
network = cnn_regressor(data_shape=inputs["x"][0].shape, filter_length=5, stride=3)

In [None]:
import copy
def strip(graph):
    g = copy.deepcopy(graph)
    for node in g.nodes(data=True):
        try:
            # node[1]["title"] = "{}:\n{}".format(type(node[1]["node"]), repr(node[1]["node"]))
            del node[1]["node"]
        except KeyError:
            pass
    return g

In [None]:
from pyvis.network import Network
stripped = strip(network)
print(stripped)

from pyvis.network import Network
net = Network('700px', '700px', bgcolor='#222222', font_color='white', notebook=True)
net.from_nx(stripped)
# net.show_buttons(filter_="physics")
net.show("constelation.html")

# Train Model

In [None]:
from fhez.nn.graph.utils import train
train(graph=network, inputs=inputs, batch_size=3, debug=False)