In [48]:
import os
import sys
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt
import pickle
import time

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras

from tensorflow.keras import layers

from tqdm.notebook import tqdm, trange

tqdm.pandas()

import statsmodels.api as sm
import seaborn as sns

import annutils

local_root_path = "."
%matplotlib inline
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
from bokeh.layouts import column
from bokeh.models import ColumnDataSource, RangeTool
from bokeh.plotting import figure, show
from bokeh.palettes import Magma, Inferno, Plasma, Viridis, Cividis, Colorblind, Bokeh
import itertools

output_notebook()

In [33]:
compression_opts = dict(method='zip', archive_name='out.csv')

def show_plot(df1, df2, col_name, df1_name, df2_name):
	# get the first DateTime in the dataframe index
	first = df1.index[0]
	# get the last DateTime in the dataframe index
	last = df1.index[-1]

	p = figure(height=300, width=1024, tools = "xpan,wheel_zoom,box_zoom,reset,save,hover",
	           x_axis_type="datetime", x_axis_location="above",
	           background_fill_color="#efefef", x_range=(first, last), title=col_name)

	p.line(df2.index, df2[col_name], color='red', legend_label=df2_name)
	p.line(df1.index, df1[col_name], color='blue', legend_label=df1_name)

	select = figure(title="Drag the middle and edges of the selection box to change the range above",
	                height=130, width=1024, y_range=p.y_range,
	                x_axis_type="datetime", y_axis_type=None,
	                tools="", toolbar_location=None, background_fill_color="#efefef")

	range_tool = RangeTool(x_range=p.x_range)
	range_tool.overlay.fill_color = "navy"
	range_tool.overlay.fill_alpha = 0.2

	select.line(df2.index, df2[col_name], color='red')
	select.line(df1.index, df1[col_name], color='blue')

	select.ygrid.grid_line_color = None
	select.add_tools(range_tool)

	diff = figure(title="Difference",
	              height=80, width=1024, y_range=p.y_range,
	              x_axis_type="datetime", y_axis_type=None,
	              tools="", toolbar_location=None, background_fill_color="#efefef")
	diff.line(df2.index, df2[col_name] - df1[col_name], color='green')

	show(column(p, select, diff))


In [57]:
def show_plots(plot_tuples):
	(df1, col_name, label) = plot_tuples[0]
	# get the first DateTime in the dataframe index
	first = df1.index[0]
	# get the last DateTime in the dataframe index
	last = df1.index[-1]

	p = figure(height=300, width=1024, tools="xpan,wheel_zoom,box_zoom,reset,save,hover",
	           x_axis_type="datetime", x_axis_location="above",
	           background_fill_color="#efefef", x_range=(first, last), title=col_name)

	palette = Colorblind[8]  # Colorblind[8]

	colors = itertools.cycle(palette)

	for i, (df, col_name, label) in enumerate(plot_tuples):
		if i == 0:
			p.line(df.index, df[col_name], color='black', legend_label=label)
		else:
			p.line(df.index, df[col_name], color=next(colors), legend_label=label)

	select = figure(title="Drag the middle and edges of the selection box to change the range above",
	                height=130, width=1024, y_range=p.y_range,
	                x_axis_type="datetime", y_axis_type=None,
	                tools="", toolbar_location=None, background_fill_color="#efefef")

	range_tool = RangeTool(x_range=p.x_range)
	range_tool.overlay.fill_color = "navy"
	range_tool.overlay.fill_alpha = 0.2

	colors = itertools.cycle(palette)

	for i, (df, col_name, label) in enumerate(plot_tuples):
		if i == 0:
			select.line(df.index, df[col_name], color='black')
		else:
			select.line(df.index, df[col_name], color=next(colors))

	select.ygrid.grid_line_color = None
	select.add_tools(range_tool)

	show(column(p, select))


In [35]:
def read_df(file_name):
    df = pd.read_csv(file_name, compression=compression_opts, index_col=0)
    df.index = pd.to_datetime(df.index)
    return df

In [36]:
experiment = "6years"

experiment_dir = os.path.join(local_root_path, "Experiments", experiment)
if not os.path.exists(experiment_dir):
	print("Experiment does not exist")


model_dir = os.path.join(experiment_dir, "models")

model_file = "mtl_i118_residual_lstm_8_2.h5"

model_name = os.path.splitext(model_file)[0]

model_prediction_dir = os.path.join(experiment_dir, "results", "prediction", model_name)
if not os.path.exists(model_prediction_dir):
	print("model_prediction_dir does not exist")

file_name = "dsm2_ann_inputs_base.csv"
prediction_file = os.path.join(model_prediction_dir, file_name)
if not os.path.exists(prediction_file):
	print("prediction_file does not exist")

file_part = os.path.splitext(file_name)[0]

target_file = os.path.join(experiment_dir, "results", "target", file_part + "_target.csv")
if not os.path.exists(target_file):
	print("target_file does not exist")

In [37]:
prediction = pd.read_csv(prediction_file, compression=compression_opts, index_col=0)
prediction.index = pd.to_datetime(prediction.index)
target = pd.read_csv(target_file, compression=compression_opts, index_col=0)
target.index = pd.to_datetime(target.index)

In [38]:
show_plot(target, prediction, target.columns[11], "target", "prediction")

In [39]:
# for i in range(len(target.columns)):
#     show_plot(target, prediction, target.columns[i], "target", "prediction")


In [61]:
model_files = [f for f in os.listdir(model_dir) if f.endswith(".h5")]

file_name = "dsm2_ann_inputs_base.csv"

file_part = os.path.splitext(file_name)[0]

plot_tuples = []

target_file = os.path.join(experiment_dir, "results", "target", file_part + "_target.csv")

target = read_df(target_file)

plot_tuples.append((target, target.columns[11], "target"))

for model_file in model_files:
	model_name = os.path.splitext(model_file)[0]
	model_prediction_dir = os.path.join(experiment_dir, "results", "prediction", model_name)
	prediction_file = os.path.join(model_prediction_dir, file_name)
	if not os.path.exists(prediction_file):
		print("prediction_file does not exist")
	prediction = read_df(prediction_file)
	plot_tuples.append((prediction, prediction.columns[11], model_name))






In [62]:
show_plots(plot_tuples)

In [59]:
# Pick a single model and plot the predictions from the diff experiments
model_file = "mtl_i118_residual_lstm_8_2.h5"
model_name = os.path.splitext(model_file)[0]

file_name = "dsm2_ann_inputs_base.csv"
file_part = os.path.splitext(file_name)[0]

col_num = 11

experiments = [ "colab", "6yearsAugmented", "6years"]

plot_tuples = []

#add the targets - these should stack on top of each other

experiment = experiments[0]
# for experiment in experiments:
if experiment is not None:
	experiment_dir = os.path.join(local_root_path, "Experiments", experiment)
	target_file = os.path.join(experiment_dir, "results", "target", file_part + "_target.csv")
	target = read_df(target_file)
	plot_tuples.append((target, target.columns[col_num], experiment + "_target"))


for experiment in experiments:
	experiment_dir = os.path.join(local_root_path, "Experiments", experiment)
	model_prediction_dir = os.path.join(experiment_dir, "results", "prediction", model_name)
	prediction_file = os.path.join(model_prediction_dir, file_name)
	if not os.path.exists(prediction_file):
		print("prediction_file does not exist")
	else:
		prediction = read_df(prediction_file)
		plot_tuples.append((prediction, prediction.columns[11], experiment + "_" + model_name))



In [60]:
show_plots(plot_tuples)