In [1]:
version = 'v20250521'

import pandas as pd
from sklearn.model_selection import GroupKFold,cross_val_predict
from skmap.misc import find_files, GoogleSheet, ttprint
import joblib
import numpy as np


## SNR for series

In [2]:
srs = pd.read_parquet(f'./metric/srs_predicted_{version}.pq')
srs = srs[srs.columns[0:21].tolist()+srs.columns[-24::].tolist()]

srs = srs.rename(columns={'slope_pred':'pred_pnt', 'slope_std':'noise_pnt','pred':'pred_srs','pred_std':'noise_srs'})
srs['signal_pnt'] = srs['pred_pnt'].abs()
srs['signal_srs'] = srs['pred_srs'].abs()
srs['snr_pnt'] = srs['signal_pnt']/srs['noise_pnt']
srs['snr_srs'] = srs['signal_srs']/srs['noise_srs']
srs['error'] = srs['pred_pnt'] - srs['slope_obsv']
srs['rae'] = srs['error'].abs()/srs['slope_obsv'].abs()
srs['abs_error'] = srs['error'].abs()
srs.loc[srs['slope_obsv'].abs()<0.5, 'rae'] =  srs.loc[srs['slope_obsv'].abs()<0.5, 'abs_error']/0.5


srs['time_span'] = srs['time_series'].str[2]-srs['time_series'].str[0]
srs['time_str'] = srs['time_series'].str[0].astype(str) + '-' + srs['time_series'].str[1].astype(str) + '-' + srs['time_series'].str[2].astype(str)

In [3]:
srs[['time_span','signal','noise_pnt','noise_srs','snr_pnt','snr_srs']]

Unnamed: 0,time_str,time_span,signal,noise_pnt,noise_srs,snr_pnt,snr_srs
0,2009-2015-2018,9,0.408427,1.053973,1.218193,0.387512,0.335273
1,2009-2015-2018,9,0.149596,1.358426,1.197812,0.110124,0.124891
2,2009-2015-2018,9,0.147691,1.286279,1.251234,0.114820,0.118036
3,2009-2015-2018,9,0.192788,1.248777,1.157286,0.154381,0.166586
4,2009-2015-2018,9,0.213396,1.911890,1.256052,0.111615,0.169894
...,...,...,...,...,...,...,...
1737,2012-2015-2018,6,0.150086,1.717892,1.145378,0.087367,0.131037
1738,2012-2015-2018,6,0.783239,1.618864,1.352501,0.483820,0.579104
1739,2012-2015-2018,6,0.533490,11.466865,1.750533,0.046524,0.304759
1740,2012-2015-2018,6,0.150100,6.808468,1.166445,0.022046,0.128681


In [36]:
srs.groupby('time_span')[['signal','noise_pnt','noise_srs','snr_pnt','snr_srs']].describe()
des_s = srs.groupby('time_span')[['signal','noise_pnt','noise_srs','snr_pnt','snr_srs']].agg(['count','mean', 'median', 'std']).T


In [15]:
des_s = srs.loc[srs['time_span'].isin([9])].groupby('time_span')[['snr_pnt','signal_pnt','noise_pnt','snr_srs','signal_srs','noise_srs']].agg(['count','mean']) #
des_s['tgt'] = 'srs'
des_s

Unnamed: 0_level_0,snr_pnt,snr_pnt,signal_pnt,signal_pnt,noise_pnt,noise_pnt,snr_srs,snr_srs,signal_srs,signal_srs,noise_srs,noise_srs,tgt
Unnamed: 0_level_1,count,mean,count,mean,count,mean,count,mean,count,mean,count,mean,Unnamed: 13_level_1
time_span,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
9,1731,0.110533,1731,0.639003,1731,7.207558,1731,0.156368,1731,0.641743,1731,3.039567,srs


## SNR for pairs

In [4]:
pair = pd.read_parquet(f'./metric/pair_predicted_{version}.pq')
pair = pair.drop(columns=['time_pair_y','lc_survey_pair_x','pred_pair'])
pair = pair.rename(columns={'pred_change_pnt':'pred_pnt','pred':'pred_pair','pred_std':'noise_pair',
                            'time_pair_x':'time_pair','lc_survey_pair_y':'lc_survey_pair'})
pair['signal_pnt'] = pair['pred_pnt'].abs()
pair['signal_pair'] = pair['pred_pair'].abs()
pair['snr_pnt'] = pair['signal_pnt']/pair['noise_pnt']
pair['snr_pair'] = pair['signal_pair']/pair['noise_pair']
pair['lc_str'] = pair['lc_survey_pair'].str[0] + '-' + pair['lc_survey_pair'].str[1]
pair['error'] = pair['pred_pnt'] - pair['soc_change']
pair['abs_error'] = pair['error'].abs()
pair['rae'] = pair['abs_error']/pair['soc_change'].abs()
pair.loc[pair['soc_change'].abs()<1, 'rae'] =  pair.loc[pair['soc_change'].abs()<1, 'abs_error'] # "normalize" rae, with detectability floor == 1


pair['lc_str'] = pair['lc_survey_pair'].str[0] + '-' + pair['lc_survey_pair'].str[1]
pair['time_span'] = pair['time_pair'].str[1] - pair['time_pair'].str[0]

# des_p = pair.groupby('time_span')[['signal','noise_pnt','noise_pair','snr_pnt','snr_pair']].agg(['count','mean', 'median', 'std']).T


In [16]:
des_p = pair.loc[pair['time_span'].isin([9])].groupby('time_span')[['snr_pnt','signal_pnt','noise_pnt','snr_pair','signal_pair','noise_pair']].agg(['count','mean']) #.loc[pair['time_span']==9].groupby('time_span')
des_p['tgt'] = 'pair'
des_p

Unnamed: 0_level_0,snr_pnt,snr_pnt,signal_pnt,signal_pnt,noise_pnt,noise_pnt,snr_pair,snr_pair,signal_pair,signal_pair,noise_pair,noise_pair,tgt
Unnamed: 0_level_1,count,mean,count,mean,count,mean,count,mean,count,mean,count,mean,Unnamed: 13_level_1
time_span,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
9,1789,0.099086,1789,5.558928,1789,64.302896,1789,0.079885,1789,3.229785,1789,32.171944,pair


In [9]:
# only examine land cover series with population>30
vc = pair['lc_str'].value_counts()
lcl = vc[vc>30].index.tolist() 
pair = pair.loc[pair['lc_str'].isin(lcl)]