In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
#import time
import json

import joblib
import torch

import numpy as np
import pandas as pd
import xgboost as xgb

from scipy.stats import pearsonr

from IPython.display import Image

import plotly.graph_objects as go

from utils import scale_data
from utils import preprocess_AUT_data
from utils import set_seeds

from utils import bayes_filter
from utils import l2norm_km
from utils import print_metrics

from utils import train_teleport
from utils import test_models

In [None]:
from constants import SEED

from constants import NUM_ESTIMATORS

In [None]:
SENSOR_COLS = ['act', 'YTD']
TARGET = ['Temperature']
X_WEST = 120
X_EAST = 160
DF_COW = 'df_cow.csv'
FN_KRIGED_COW = 'assets/cow_maps.npy'
FN_MODELS_H_COW = 'assets/cow_models_H.pkl'
FN_MODEL_TELEPORT_COW = 'assets/cow_models_teleport.pkl'

# Prepare Data

In [None]:
set_seeds()

In [None]:
if not os.path.exists(DF_COW) or not os.path.exists(FN_KRIGED_COW):
    df, kriged_maps = preprocess_AUT_data('animal_id', SENSOR_COLS, TARGET, ['animal_id', 'datetime', 'postal_code'])
    df.to_csv(DF_COW, index=False)
    joblib.dump(kriged_maps, FN_KRIGED_COW)

df = pd.read_csv(DF_COW)
df, scaler = scale_data(df, SENSOR_COLS + TARGET)

kriged_maps = joblib.load(FN_KRIGED_COW)

In [None]:
n_examples_data = len(df)
val_examples_data = np.random.choice(df.index, int(n_examples_data * 0.3), replace=False)
df_val_data = df.loc[df.index.isin(val_examples_data)]
df_train_data = df.loc[~df.index.isin(val_examples_data)]

df_joined = df.query(f'real_x < {X_WEST} or real_x > {X_EAST}')
n_examples_west = len(df_joined.query(f'real_x < {X_WEST}')['animal_id'].unique())
n_examples_east = len(df_joined.query(f'real_x > {X_EAST}')['animal_id'].unique())
val_examples_west = np.random.choice(df_joined.query(f'real_x < {X_WEST}')['animal_id'].unique(), int(n_examples_west * 0.3), replace=False)
val_examples_east = np.random.choice(df_joined.query(f'real_x > {X_EAST}')['animal_id'].unique(), int(n_examples_east * 0.3), replace=False)

n_examples_animal = len(df['animal_id'].unique())
val_examples_animal = np.random.choice(df['animal_id'].unique(), int(n_examples_animal * 0.3), replace=False)
df_val_animal = df.loc[(df['animal_id'].isin(val_examples_animal))]
df_train_animal = df.loc[(~df['animal_id'].isin(val_examples_animal))]

# Training
## Train Model H

In [None]:
if not os.path.exists(FN_MODELS_H_COW):
    models_H = {}
    for n in ['data_split', 'animal_split']:
        if n == 'data_split':
            X = df_train_data[SENSOR_COLS]
            y = df_train_data[TARGET]
        elif n == 'animal_split':
            X = df_train_animal[SENSOR_COLS]
            y = df_train_animal[TARGET]

        reg = xgb.XGBRegressor(n_estimators=NUM_ESTIMATORS, random_state=SEED, n_jobs=8)
        reg.fit(X, y)

        models_H[n] = reg
    joblib.dump(models_H, FN_MODELS_H_COW)
else:
    models_H = joblib.load(FN_MODELS_H_COW)

In [None]:
dist = {}

for n in models_H:
    if n == 'data_split':        
        df_val_data_examples = df_val_data['animal_id'].unique()
        df_val = df.loc[df['animal_id'].isin(df_val_data_examples)]
    elif n == 'animal_split':
        df_val = df_val_animal
    
    dist[n] = print_metrics(df_val, 'animal_id', models_H[n], SENSOR_COLS, TARGET, kriged_maps, scaler, 'A')

## Train Teleport Models

In [None]:
df_train_west = df_joined.loc[~df_joined['animal_id'].isin(val_examples_west)].query(f'real_x < {X_WEST}')
df_train_east = df_joined.loc[~df_joined['animal_id'].isin(val_examples_east)].query(f'real_x > {X_EAST}')
df_val_west = df_joined.loc[df_joined['animal_id'].isin(val_examples_west)]
df_val_east = df_joined.loc[df_joined['animal_id'].isin(val_examples_east)]

In [None]:
if not os.path.exists(FN_MODEL_TELEPORT_COW):
    models_T = train_teleport(
        df_train_west[SENSOR_COLS].to_numpy(),
        df_train_east[SENSOR_COLS].to_numpy(),
        df_val_west[SENSOR_COLS].to_numpy(),
        df_val_east[SENSOR_COLS].to_numpy()
    )
    joblib.dump(models_T, FN_MODEL_TELEPORT_COW)
else:
    models_T = joblib.load(FN_MODEL_TELEPORT_COW)
    
enc_a = models_T['enc_a']
enc_b = models_T['enc_b']
lat = models_T['lat']
dec_a = models_T['dec_a']
dec_b = models_T['dec_b']

# Test

In [None]:
orig_a, tele_a, orig_b, tele_b, rse_tele_a, rse_tele_b, mae_ae = test_models(
    'animal_id', df_val_west, df_val_east, SENSOR_COLS, 
    models_H['data_split'], models_T,
    kriged_maps['Temperature'], scaler,'A'
)

In [None]:
mae_ae = np.array(mae_ae)
print(f'AutoEncoder MAE: {mae_ae.mean()}')

In [None]:
distance_ew = l2norm_km((X_WEST, 0), (X_EAST, 0), 'A')
print(f'Distance East-West: {distance_ew}')

# Plots


In [None]:
df_val = pd.concat([df_val_west, df_val_east]).reset_index().drop(columns=['index'])

## Scatter Plot Real vs Predicted Values (Model H)

In [None]:
orig = []
pred = []

for _, tmp_df in df_val.groupby(['animal_id']):
    real_scaled = scaler.inverse_transform(tmp_df['Temperature'].to_numpy(), ['Temperature'])
    orig.extend(real_scaled.ravel())

    w_prime = models_H['data_split'].predict(tmp_df[SENSOR_COLS])
    w_prime_scaled = scaler.inverse_transform(w_prime, ['Temperature'])
    pred.extend(w_prime_scaled.ravel())
    
fig = go.Figure()
fig.add_trace(go.Scatter(x=orig, y=np.array(pred).ravel(), mode='markers', name='Prediction'))

fig.add_trace(go.Scatter(x=[260, 300], y=[260, 300], mode='lines'))
fig.update_layout(showlegend=False)

fig.update_layout(autosize=False, width=500, height=500, font={'size': 24}, template='simple_white')
fig.update_layout(margin={'l': 0,'r': 0, 'b': 0,'t': 0})
fig.update_layout(xaxis_title='Real Temperature (K)', yaxis_title='Predicted Temperature (K)')
img = fig.to_image(format="png")
with open('plots/cow_weather_scatter.png', 'wb') as f:
    f.write(img)
Image(img)

##  Localisation over Time

In [None]:
df_results = pd.DataFrame()
results = []
durations = [7, 14, 31, 90, 180, 365]
        
for animal_id, tmp_df in df_val.groupby(['animal_id']):
    real_x = int(tmp_df['real_x'].iloc[0])
    real_y = int(tmp_df['real_y'].iloc[0])
    postal_code = int(tmp_df['postal_code'].iloc[0])

    for duration in durations:
        for run in range(20):
            if duration != 365:
                max_time = kriged_maps['Temperature'].shape[0]
                start = np.random.randint(0, max_time - duration)
            else:
                if duration == 365 and run == 0:
                    start = 0
                else:
                    break

            w_prime_range = models_H['data_split'].predict(tmp_df[SENSOR_COLS].iloc[start:start+duration])
            x, y = bayes_filter(w_prime_range, kriged_maps['Temperature'], start, duration)
            dist = l2norm_km((real_x, real_y), (x, y), 'A')
            res = {'animal_id': animal_id, 'dist': dist, 'pred_x': x, 'pred_y': y, 'zip': postal_code,
                   'real_x': real_x, 'real_y': real_y, 'run': run, 'duration': duration, 'start': start}
            results.append(res)
                
df_results = pd.DataFrame(results)

In [None]:
fig = go.Figure()
for duration in durations:
    fig.add_trace(go.Box(y=df_results.loc[df_results['duration']==duration, 'dist'], name=duration))
fig.update_layout(showlegend=False, template='simple_white')
fig.update_layout(xaxis_title='Days', font={'size': 24}, margin={'l': 0,'r': 0, 'b': 0,'t': 0})
fig.update_yaxes(title_text='Error (km)', range=[0, 100])
fig.show()

In [None]:
regions = {'West': [4091, 4793, 5300, 4754, 5211],
           'East': [8063, 8232, 3610, 3720, 8162]}

plz_map = {4091: 'Farm 1', 4793: 'Farm 2', 5300: 'Farm 3', 4754: 'Farm 4', 5211: 'Farm 5',
           8063: 'Farm 1', 8232: 'Farm 2', 3610: 'Farm 3', 3720: 'Farm 4', 8162: 'Farm 5'}

for k, region in regions.items():
    fig = go.Figure()

    for duration in df_results['duration'].unique():
        for plz in region:
            results = df_results.loc[(df_results['duration'] == duration) & (df_results['zip'] == plz)]['dist'].to_numpy()
            x = [[plz_map[plz]]*len(results), ['{:3d} days'.format(int(duration))]*len(results)]
            fig.add_trace(go.Box(y=results, x=x, name='Days: {:3d}'.format(int(duration)), showlegend=False))
    fig.update_layout(margin={'l': 0,'r': 0, 'b': 0,'t': 0}, font={'size': 16})
    fig.update_yaxes(title='Error (km)', range=[0, 250])
    fig.show()
    
    fig.write_image('plots/cow_localization_{:}.pdf'.format(k))

## Map of Austria

In [None]:
with open('assets/austria.geojson', 'r') as f:
    austria = json.load(f)

marker_west = {'color': 'blue', 'size': 20}
marker_east = {'color': 'red', 'size': 20}
marker_middle = {'color': 'grey', 'size': 20}

fig = go.Figure()
# add stations on the west
fig.add_trace(go.Scattermapbox(lat=df[df.real_x < X_WEST].groupby('animal_id').mean()['lat'],
                               lon=df[df.real_x < X_WEST].groupby('animal_id').mean()['lon'],
                               marker=marker_west,
                               name = "West")
                               )

# add stations in the middle
fig.add_trace(go.Scattermapbox(lat=df[(df.real_x < X_EAST) & (df.real_x > X_WEST)].groupby('animal_id').mean()['lat'],
                               lon=df[(df.real_x < X_EAST) & (df.real_x > X_WEST)].groupby('animal_id').mean()['lon'],
                               marker=marker_middle,
                               name = "Middle")
                               )

# add stations on the east
fig.add_trace(go.Scattermapbox(lat=df[df.real_x > X_EAST].groupby('animal_id').mean()['lat'],
                               lon=df[df.real_x > X_EAST].groupby('animal_id').mean()['lon'],
                               marker=marker_east,
                               name = "East")
                               )


fig.update_layout(
    margin={"r":0,"t":0,"l":0,"b":0},
    mapbox=go.layout.Mapbox(
        style="stamen-terrain", 
        zoom=6.4,
        center_lat =  47.7,
        center_lon = 13.31,
        layers=[{
            'sourcetype': 'geojson',
            'source': austria,
            'type': 'line',
        }]

    )
)
fig.update_layout(legend=dict(
    yanchor="top",
    y=0.99,
    xanchor="left",
    x=0.01
))
fig.update_layout(mapbox_style='carto-positron')
fig.show()

## Original vs Teleported

In [None]:
tmp_df = df_val.loc[df_val['animal_id'] == '5c0161270581e6502d38b2aa']
S_ab = torch.Tensor(tmp_df[SENSOR_COLS].to_numpy())
with torch.no_grad():
    s_tele_a = dec_b(lat(enc_a(S_ab)))
S_ab = S_ab.numpy()
s_tele_a = s_tele_a.numpy()

fig = go.Figure()
fig.add_trace(go.Scatter(y=scaler.inverse_transform(S_ab[:, 0], [SENSOR_COLS[0]]).squeeze(), name='Original', line=dict(width=3)))
fig.add_trace(go.Scatter(y=scaler.inverse_transform(s_tele_a[:, 0], [SENSOR_COLS[0]]).squeeze(), name='Teleported'))
fig.update_layout(template='simple_white', margin={'l': 0,'r': 0, 'b': 0,'t': 0})
fig.update_layout(font={'size': 24}, xaxis_title='Days', yaxis_title='Activity')
fig.update_layout(legend={'orientation': 'h', 'yanchor': 'bottom', 'y': 1.02, 'xanchor': 'right', 'x': 1})
fig.show()
fig.write_image('plots/cow_teleport_sample.pdf')

In [None]:
pearsonr(S_ab[:, 0], s_tele_a[:, 0])

## Distance from Teleported

In [None]:
stations_west = df_val_west['animal_id'].unique()
stations_east = df_val_east['animal_id'].unique()
station_distances = []

for i in range(len(stations_west)):
    tmp_df = df.loc[df['animal_id'] == stations_west[i]]
    x_A = tmp_df['real_x'].iloc[0]
    y_A = tmp_df['real_y'].iloc[0]
    for i in range(len(stations_east)):
        tmp_df = df.loc[df['animal_id'] == stations_east[i]]
        x_B = tmp_df['real_x'].iloc[0]
        y_B = tmp_df['real_y'].iloc[0]
        
        station_distances.append(l2norm_km((x_A, y_A), (x_B, y_B), 'A'))

In [None]:
fig = go.Figure()
fig.add_trace(go.Box(y=station_distances, name='Station Distances'))
fig.add_trace(go.Box(y=orig_a, name='Original Trace West'))
fig.add_trace(go.Box(y=tele_a, name='Teleported Trace West'))
fig.add_trace(go.Box(y=orig_b, name='Original Trace East'))
fig.add_trace(go.Box(y=tele_b, name='Teleported Trace East'))
fig.update_layout(showlegend=False, template='simple_white', margin={'l': 0,'r': 0, 'b': 0,'t': 0})
fig.update_layout(font={'size': 24}, yaxis_title='Error (km)')
fig.show()

fig.write_image('plots/cow_teleport_localization.pdf')

In [None]:
avg_err = (np.abs(np.array(orig_a) - np.array(tele_a)).mean() + np.abs(np.array(orig_b) - np.array(tele_b)).mean())/2
print(f'Average Error : {avg_err} km')

In [None]:
err_a = np.abs(np.array(orig_a) - np.array(tele_a)).mean()
err_b = np.abs(np.array(orig_b) - np.array(tele_b)).mean()
print(f'Error A: {err_a} km Err B: {err_b} km')

In [None]:
rel_error = ((np.array(tele_a) - np.array(orig_a)) / np.abs(np.array(tele_a))).mean() * 100
print(f'Relative Error A: {rel_error}%')
rel_error = ((np.array(tele_b) - np.array(orig_b)) / np.abs(np.array(tele_b))).mean() * 100
print(f'Relative Error B: {rel_error}%')

In [None]:
fig = go.Figure()
fig.add_trace(go.Box(y=np.array(rse_tele_a), name='Teleported Trace East'))
fig.add_trace(go.Box(y=np.array(rse_tele_b), name='Teleported Trace West'))
fig.update_layout(showlegend=False, template='simple_white', margin={'l': 0,'r': 0, 'b': 0,'t': 0})
fig.update_layout(font={'size': 24}, yaxis_title='Mean Absolut Error')
fig.update_yaxes(range=[0, 0.12])
fig.show()

fig.write_image('plots/cow_teleport_mae.pdf')