In [37]:
import torch
import arviz as az
from scipy.optimize import curve_fit
from bokeh.plotting import figure, show,output_file, save
from bokeh.transform import factor_cmap, factor_mark
from bokeh.palettes import Spectral
from bokeh.models import Slope, Div
from bokeh.io import curdoc,output_notebook,export_png
from bokeh.layouts import column,gridplot
import numpy as np
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt
from seaborn import clustermap
from bokeh.models import Band, ColumnDataSource
import pandas as pd
TOOLS="hover,crosshair,pan,wheel_zoom,zoom_in,zoom_out,box_zoom,undo,redo,reset,tap,save,box_select,poly_select,lasso_select,examine,help"

In [38]:
PM25=pd.read_excel("../data/Pre_flow_cal.xlsx")
train,test=PM25.loc[PM25.label==0,:],PM25.loc[PM25.label==1,:]

In [39]:
gpr=torch.load("../models/flow_season",weights_only=False,map_location='cuda')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [40]:
extra_data_end=train.loc[train.corrected_week<20,:]
extra_data_start=train.loc[train.corrected_week>52.3-20,:]
extra_data_end.loc[:,"corrected_week"]=extra_data_end.corrected_week+52.3
extra_data_start.loc[:,"corrected_week"]=-(52.3-extra_data_start.corrected_week)
extra_data_start=extra_data_start.set_index(np.zeros(extra_data_start.shape[0]))
extra_data_end=extra_data_end.set_index(np.zeros(extra_data_end.shape[0]))
train=pd.concat([train,extra_data_end,extra_data_start])

In [41]:
extra_data_end_test=test.loc[test.corrected_week<20,:]
extra_data_start_test=test.loc[test.corrected_week>52.3-20,:]
extra_data_end_test.loc[:,"corrected_week"]=extra_data_end_test.corrected_week+52.3
extra_data_start_test.loc[:,"corrected_week"]=-(52.3-extra_data_start_test.corrected_week)
extra_data_start_test=extra_data_start_test.set_index(np.zeros(extra_data_start_test.shape[0]))
extra_data_end_test=extra_data_end_test.set_index(np.zeros(extra_data_end_test.shape[0]))
test=pd.concat([test,extra_data_end_test,extra_data_start_test])

In [43]:
linmod=torch.linspace(0,52,800).to(device)
pm25_model,pm25_model_std=gpr(linmod,full_cov=True)

linmod_np=linmod.cpu().detach().numpy()
pm25_model_np,pm25_model_std_np=pm25_model.cpu() .detach().numpy().copy(),pm25_model_std.diag().sqrt().cpu() .detach().numpy().copy()

lower1 = pm25_model_np - pm25_model_std_np
upper1 = pm25_model_np + pm25_model_std_np
data1=pd.DataFrame([linmod_np,lower1,upper1],index=["flow","lower","upper"]).T
data1 = ColumnDataSource(data1.reset_index())

In [44]:
def roll_week(data,resolution,week,particle,parameter,x):
    mean=[]
    std=[]
    sort=data.sort_values(parameter)
    list_std=[particle+"_std" for particle in particle]
    for i in x:
        mean.append(sort.loc[(i-week<sort.corrected_week)&(i+week>sort.corrected_week),particle].mean())
        std.append(sort.loc[(i-week<sort.corrected_week)&(i+week>sort.corrected_week),particle].std())
    mean_1,std_1=pd.DataFrame(mean,index=x),pd.DataFrame(std,index=x)
    std_1.columns=std_1.columns+"_std"
    data=pd.concat([mean_1,std_1],axis=1)
    return data.set_index(x)

In [45]:
resolution=500
x=np.linspace(0,52,resolution,endpoint=True)
mean=roll_week(train,500,2,["flow"],"corrected_week",x)
mean_test=roll_week(test,500,2,["flow"],"corrected_week",x)

In [46]:

lower1_std = mean.flow - mean.flow_std
upper1_std = mean.flow + mean.flow_std
mean["lower1_std_pm"]=lower1_std
mean["upper1_std_pm"]=upper1_std

mean1 = ColumnDataSource(mean.reset_index())

In [47]:
train_pm=gpr(torch.tensor(mean.index.values).float().to(device))[0].cpu().detach().numpy()


In [48]:
r2_pm=r2_score(mean.flow,train_pm)
r2_pm_test=r2_score(mean_test.flow,train_pm)


In [49]:
output_notebook()

In [50]:
p = figure(x_range=(0,52),y_range=(3.6,4.6));
p.title.text = r"$$ Flow seasonality";
p.xgrid.grid_line_color=None;
p.ygrid.grid_line_alpha=0.5;

p.scatter(train.corrected_week, y=train.flow, color="blue", marker="dot", size=20, alpha=0.4,legend_label="raw points");
band = Band(base="index", lower="lower1_std_pm", upper="upper1_std_pm",source=mean1, fill_color="red", line_color="black",fill_alpha=0.2);
band1 = Band(base="flow", lower="lower", upper="upper",source=data1,fill_alpha=0.5, fill_color="blue", line_color="black");

p.add_layout(band);
p.add_layout(band1);
p.line(mean.index, mean.flow, line_width=3,color="green",legend_label="Train roll mean R2 "+str(round(r2_pm,2)));
p.line(mean_test.index, mean_test.flow, line_width=3,color="orange",legend_label="test roll mean R2 "+str(round(r2_pm_test,2)));

p.line(linmod_np, pm25_model_np, line_width=3,color="red",legend_label="Gaussion model");
p.xaxis.axis_label = r'$$\frac{L}{s}$$';
p.yaxis.axis_label = r'$$\frac{\mu g}{m^3} $$';
p.yaxis.axis_label_orientation  = 0


show(p);