In [5]:
import numpy as np
np.__version__

'1.26.4'

In [7]:
from src.model import NTEPModel

ModuleNotFoundError: No module named 'gptools'

In [None]:
# Load the already fitted model
m = NTEPModel(load_file='model_04-feb-2024-distinct_taus (rater_severity)')

In [None]:
# Or sample the model, and then save it
m = NTEPModel(num_basis_functions = 8,            # 8 by default
                        stan_file = 'model_04-feb-2024-distinct_taus.stan', # (some predetermined filepath) by default
                        data_file = 'quality_nj2.csv', # nj2 dataset by default
                        pred_N = 100,                         # 100 by default
                        padding = 5,)                         # 5 by default
m.sample()
m.save(file='model_04-feb-2024-distinct_taus (rater_severity)')

In [None]:
# check the model's convergence
m.plot_trace()

In [None]:
# Plot specific variables with plot_trace
m.plot_trace(var_names=['rater_severity','tau_rater'])

In [None]:
# 
m.plot_time_effect()

In [None]:
m.plot_time_effect(entries=4, # plot N random entries
                   credit_interval = 0.5, # credit interval
                   sort_entries = 'data', # sort methods, accepts:  'unsorted', 'weighted', 'annual', 'data'
                                        # data ranks by average of the points where we have taken ratings
                                        # annual ranks by all predicted point
                                        # weighted (default) uses all predicted points, but weights them according to the standard deviation of samples
                                        # predictions near data points are weighted more heavily
                  )

In [None]:
m.plot_time_effect(entries=['A11-40', 'J-2726','BAR PP 79366'], # plot the list of entries you want
                   credit_interval = 0.5, # credit interval
                   sort_entries = 'unsorted', # unsorted to keep them in the order listed initially
                  )

In [None]:
m.loo()

In [None]:
m.plot_trace(var_names=['rater_severity','tau_rater'])

In [None]:
m.plot_plot_effect()

In [None]:
# scrap code, probably wont be used 
import arviz as az
rater_codes = dict(m.df.groupby('RATER')['RATER_CODE'].mean().apply(lambda x: round(x)))
inv_rater_codes = {}
for key in rater_codes.keys(): inv_rater_codes[rater_codes[key]] = key
for i in range(7):
    print(inv_rater_codes[i])
    az.plot_trace(m.fit.tau_rater[:,i,:].T - m.fit.rater_severity[:,i].T,compact=False)

In [None]:
# might turn this into a function in the utility file, but not sure.
import seaborn as sns
import matplotlib.pyplot as plt

for i in range(7):
    plt.figure(figsize=(6, 2)) 
    for j in range(8):
        sample_data = (m.fit.tau_rater[:,i,:].T - m.fit.rater_severity[:,i].T)[j]
        sns.kdeplot(sample_data, linewidth=1, label="Threshold "+str(j+1)+"|"+str(j+2)+" : "+str(round(sample_data.mean(),2)))
    plt.title(inv_rater_codes[i]+" tau distribution")
    plt.legend(bbox_to_anchor=(1.02, 1.12))
    plt.show()

In [None]:
from scipy.special import softmax
import numpy as np
import pandas as pd

def rsm_probability(y, theta, beta, tau):
    unsummed = np.concatenate(([0], theta - beta - tau))
    #print(unsummed)
    probs = softmax(np.cumsum(unsummed))
    return probs[y]

for r in range(7):
    plt.figure(figsize=(5, 3)) 
    tau = list((m.fit.tau_rater[:,r,:].T).mean(axis=1))
    beta = m.fit.rater_severity[:,r].T.mean()
    for i in range(9):
        plt.plot(np.linspace(-6,6,100), [rsm_probability(i,theta,beta,tau) for theta in np.linspace(-6,6,100)], linewidth=1, label=str(i+1))
        #plt.vlines((tau-m.fit.rater_severity[:,1].T.mean())[i],ymin=0, ymax=1)
        plt.title(inv_rater_codes[r]+" category probabilities")
        plt.xlabel('Turf Quality')
        plt.ylabel('Probability')
        plt.legend(bbox_to_anchor=(1.02, 1))
    plt.show()

In [None]:
from scipy.optimize import bisect

import plotly.graph_objects as go
import plotly.express as px

lb = -6
ub = 6
resolution = 100
colors = px.colors.diverging.Spectral

x_space = np.linspace(lb,ub,resolution)
x_data = []

for r in range(7):
    tau = list((m.fit.tau_rater[:,r,:].T).mean(axis=1))
    beta = m.fit.rater_severity[:,r].T.mean()
    boundaries = []
    for i in range(8):
        def probability_difference(x):
            return rsm_probability(i+1,x,beta,tau) - rsm_probability(i,x,beta,tau)
        #plt.plot(x_space, [probability_difference(theta) for theta in x_space], linewidth=1, label=str(i+1))
        boundaries.append(bisect(probability_difference,lb,ub))
    stacks = [boundaries[0]-lb]
    for i in range(1,8):
        stacks.append(max(0,boundaries[i]-max(boundaries[:i])))
    stacks.append(max(0,ub-max(boundaries)))
    x_data.append(stacks)
    #print(sum(stacks))
    #print(stacks)
    #print(boundaries)
x_data = np.array(x_data).T
y_data = [inv_rater_codes[i]+" " for i in range(7)]
fig = go.Figure()

for i in range(9):
    fig.add_trace(go.Bar(
        y=y_data,
        x=x_data[i],
        name=str(i+1),
        orientation='h',
        marker=dict(
            color=colors[i],
            line=dict(color=colors[i], width=3)
        )
    ))

fig.update_layout(barmode='stack',
                  title='Most Probable Rating',
                  xaxis=dict(title='Turf Quality', range=[0, ub-lb],
                              ticktext=[str(i) for i in range(lb,ub+1)],
                             tickvals=list(range(0, ub-lb+1))),
                  yaxis=dict(title='Rater', automargin=True),)
fig.show()

In [None]:
tau = list((m.fit.tau_rater[:,6,:].T + m.fit.rater_severity[:,6].T).mean(axis=1))
tau

In [None]:
import numpy as np

df = m.df

table = pd.pivot_table(df[['PLT_ID','RATER_CODE','QUALITY']], aggfunc='count',index='QUALITY',columns='RATER_CODE',values='QUALITY')
for i in range(7): table[i] = table[i]/sum(table[i])

y_data = [inv_rater_codes[i]+" " for i in range(7)]
fig = go.Figure()

for i in range(9):
    fig.add_trace(go.Bar(
        y=y_data,
        x=table.loc[i+1],
        name=str(i+1),
        orientation='h',
        marker=dict(
            color=colors[i],
            line=dict(color=colors[i], width=3)
        )
    ))

fig.update_layout(barmode='stack',
                  title='Rating Proportions',
                  xaxis=dict(title='Proportion of Ratings', range=[0,1]),
                  yaxis=dict(title='Rater', automargin=True),)
fig.show()

In [None]:
table.loc[1]

In [None]:
df.groupby('QUALITY')['RATER_CODE'].count()

In [None]:
import pandas as pd
pd.pivot_table(df[['PLT_ID','RATER_CODE','QUALITY']], aggfunc='count',index='QUALITY',columns='RATER_CODE',values='QUALITY')

In [None]:
for i in range(7):
    table[i] = table[i]/sum(table[i])
table