In [7]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from rdtools.soiling import cods_analysis

from src.data.import_data import import_df_from_zip_pkl, import_df_info_from_zip
from src.data.make_dataset import downsample_dataframe, remove_clipping_with_universal_window, remove_night_time_data

In [10]:
def test_cods(path_to_pkl_dir, path_to_info, nr_files, dataset_name, offset=0):
    df_info = import_df_info_from_zip(path_to_info)
    residuals = np.array([])
    for i in range(nr_files):
        df = pd.read_pickle(path_to_pkl_dir + "/synthetic_{:s}_pi_daily_{:s}.pkl".format(dataset_name, str(i+1).zfill(3)), compression = 'gzip')
        df = df.iloc[offset:]
        cods = cods_analysis(df.PI)
        _, deg, *_ = cods.iterative_signal_decomposition(perfect_cleaning=True, order=['SC', 'SR', 'Rd'])
        deg = deg / 100
        residuals = np.append(residuals, (deg - df_info.Degradation_rate_linear.iloc[i]))
        print(f"The residual of {i+1}th dataset is {deg - df_info.Degradation_rate_linear.iloc[i]}")
    return residuals, np.sqrt(np.sum(residuals**2)/nr_files)

In [11]:
path_to_pkl_dir = '../data/raw/synthetic_soil_weather_pi_daily'
path_to_info = '../data/raw/synthetic_soil_weather.zip'
n_files = 50

test_cods(path_to_pkl_dir, path_to_info, n_files, dataset_name='soil_weather', offset=0)

The residual of 1th dataset is 0.0007384380739373468
The residual of 2th dataset is -0.003097224461155854
The residual of 3th dataset is 0.0063326174227221685
The residual of 4th dataset is -0.010139289538045344
The residual of 5th dataset is -0.007606597592575632
The residual of 6th dataset is 0.006574162979364506
The residual of 7th dataset is -0.0008829641482430654
The residual of 8th dataset is 0.0001310513646923233
The residual of 9th dataset is 0.006675255140127744
The residual of 10th dataset is 0.003760446517872079
The residual of 11th dataset is 0.005376347124814969
The residual of 12th dataset is 0.003661321042182227
The residual of 13th dataset is 0.0005498270991297917
The residual of 14th dataset is 0.0059458710822247705
The residual of 15th dataset is -0.00011182733630705002
The residual of 16th dataset is 0.003180492439127691
The residual of 17th dataset is 0.0008476508439532304
The residual of 18th dataset is 0.013163951052129162
The residual of 19th dataset is 0.0027415

(array([ 0.00073844, -0.00309722,  0.00633262, -0.01013929, -0.0076066 ,
         0.00657416, -0.00088296,  0.00013105,  0.00667526,  0.00376045,
         0.00537635,  0.00366132,  0.00054983,  0.00594587, -0.00011183,
         0.00318049,  0.00084765,  0.01316395,  0.00274155,  0.00641228,
         0.00244541,  0.00840284,  0.00250471,  0.00116969,  0.01172902,
        -0.00796993, -0.00442573, -0.00516608,  0.00238574, -0.00433586,
        -0.00252516,  0.00145828, -0.00093691,  0.00245894,  0.00028854,
        -0.00493885,  0.00107112,  0.00133052, -0.00651937,  0.00441436,
         0.00620008,  0.00621621, -0.01104512, -0.00074381,  0.00695656,
        -0.00474847,  0.00177261, -0.00136599,  0.0040986 ,  0.00981335]),
 0.005429425476163739)

In [12]:
path_to_pkl_dir = '../data/raw/synthetic_soil_weather_pi_daily'
path_to_info = '../data/raw/synthetic_soil_weather.zip'
n_files = 50

test_cods(path_to_pkl_dir, path_to_info, n_files, dataset_name='soil_weather', offset=170)

The residual of 1th dataset is 0.003404914465057845
The residual of 2th dataset is -0.004017531224411225
The residual of 3th dataset is 0.009407229849597444
The residual of 4th dataset is -0.009187710569460047
The residual of 5th dataset is -0.008020396256129365
The residual of 6th dataset is 0.004895510152395288
The residual of 7th dataset is -0.0005223320860907928
The residual of 8th dataset is -0.000721014102396578
The residual of 9th dataset is 0.00737194396549082
The residual of 10th dataset is 0.0035886550043174037
The residual of 11th dataset is 0.005229346364636374
The residual of 12th dataset is 0.0037319555093485463
The residual of 13th dataset is -0.002638897595938909
The residual of 14th dataset is 0.004314858663366613
The residual of 15th dataset is 0.0020961997831282914
The residual of 16th dataset is 0.0009886300100090326
The residual of 17th dataset is -0.006254158170408081
The residual of 18th dataset is 0.012165153565768009
The residual of 19th dataset is -0.002237631

(array([ 0.00340491, -0.00401753,  0.00940723, -0.00918771, -0.0080204 ,
         0.00489551, -0.00052233, -0.00072101,  0.00737194,  0.00358866,
         0.00522935,  0.00373196, -0.0026389 ,  0.00431486,  0.0020962 ,
         0.00098863, -0.00625416,  0.01216515, -0.00223763,  0.0061773 ,
        -0.0031444 ,  0.00656698,  0.00351818,  0.00560124,  0.01328237,
        -0.00667432, -0.01039484, -0.00501873, -0.00083901, -0.00466803,
         0.00383065,  0.00427191, -0.00401421, -0.00401263, -0.00633928,
        -0.00883055,  0.00080028, -0.00158237, -0.00835996,  0.00460213,
         0.00524746,  0.00601319, -0.01253055,  0.00180203,  0.0086736 ,
        -0.00158909,  0.00292034, -0.00103492,  0.0076106 ,  0.01124469]),
 0.006174319771631261)

In [13]:
path_to_pkl_dir = '../data/raw/synthetic_basic_pi_daily'
path_to_info = '../data/raw/synthetic_basic.zip'
nr_files = 50
test_cods(path_to_pkl_dir, path_to_info, nr_files, dataset_name='basic', offset=0)

The residual of 1th dataset is 0.0006652961381797932
The residual of 2th dataset is 0.0008049030934398956
The residual of 3th dataset is 0.0005536464376591967
The residual of 4th dataset is 0.0006008919010772381
The residual of 5th dataset is 0.0008432868550704836
The residual of 6th dataset is 0.0005767075567422461
The residual of 7th dataset is 0.0005441805889154468
The residual of 8th dataset is 0.0005975474488587838
The residual of 9th dataset is 0.0008628162676073223
The residual of 10th dataset is 0.0006488362326904301
The residual of 11th dataset is 0.0009386736551081649
The residual of 12th dataset is 0.0007879871444592661
The residual of 13th dataset is 0.0006352836579699653
The residual of 14th dataset is 0.0004677835131885622
The residual of 15th dataset is 0.0006292456375059305
The residual of 16th dataset is 0.0006150992087401008
The residual of 17th dataset is 0.0006157001790819232
The residual of 18th dataset is 0.0005056858203232274
The residual of 19th dataset is 0.000

(array([0.0006653 , 0.0008049 , 0.00055365, 0.00060089, 0.00084329,
        0.00057671, 0.00054418, 0.00059755, 0.00086282, 0.00064884,
        0.00093867, 0.00078799, 0.00063528, 0.00046778, 0.00062925,
        0.0006151 , 0.0006157 , 0.00050569, 0.00055388, 0.00057085,
        0.00050082, 0.00039925, 0.00052014, 0.00054752, 0.00047798,
        0.00090193, 0.00047804, 0.00057054, 0.00039486, 0.00055358,
        0.00092355, 0.00047073, 0.00054537, 0.00066546, 0.00057019,
        0.00069924, 0.0006478 , 0.00068191, 0.00054603, 0.00050603,
        0.00065676, 0.00053161, 0.00057822, 0.00074452, 0.00068894,
        0.00054551, 0.00057448, 0.00042155, 0.00069599, 0.00050154]),
 0.0006246874806859876)

In [15]:
path_to_pkl_dir = '../data/raw/synthetic_weather_pi_daily'
path_to_info = '../data/raw/synthetic_weather.zip'
nr_files = 50
test_cods(path_to_pkl_dir, path_to_info, nr_files, dataset_name='weather', offset=0)

The residual of 1th dataset is 0.0028706357545585717
The residual of 2th dataset is -0.0009515945267655329
The residual of 3th dataset is 0.005308834778936263
The residual of 4th dataset is -6.729000651431016e-05
The residual of 5th dataset is 0.0025387761368820686
The residual of 6th dataset is -0.0003065904349163328
The residual of 7th dataset is 0.0018848419483644893
The residual of 8th dataset is 0.005542556154617739
The residual of 9th dataset is 0.004981654046502875
The residual of 10th dataset is 0.00015700866800175302
The residual of 11th dataset is -0.0010210608411107668
The residual of 12th dataset is 0.004864307472200742
The residual of 13th dataset is 0.0035399258605088527
The residual of 14th dataset is -0.006085781575929164
The residual of 15th dataset is 0.00349480055598079
The residual of 16th dataset is 0.0013221488087073185
The residual of 17th dataset is -0.003059526363967684
The residual of 18th dataset is 0.003771546062551003
The residual of 19th dataset is 0.00542

(array([ 2.87063575e-03, -9.51594527e-04,  5.30883478e-03, -6.72900065e-05,
         2.53877614e-03, -3.06590435e-04,  1.88484195e-03,  5.54255615e-03,
         4.98165405e-03,  1.57008668e-04, -1.02106084e-03,  4.86430747e-03,
         3.53992586e-03, -6.08578158e-03,  3.49480056e-03,  1.32214881e-03,
        -3.05952636e-03,  3.77154606e-03,  5.42224962e-03,  4.94610722e-03,
         3.04575267e-03,  8.75210044e-04,  1.85895035e-03,  2.99235748e-03,
         8.58699545e-05,  4.21409364e-03,  3.46259852e-03, -1.83287012e-03,
        -6.44775798e-04,  4.56265036e-03, -6.69150186e-04, -4.37093935e-04,
         4.40022008e-03,  5.27696503e-04, -2.76403385e-03,  3.75048259e-03,
         2.75935645e-03, -1.51372655e-03,  2.54784846e-03,  5.35242422e-04,
        -4.15084012e-03,  2.20055066e-03,  2.96376006e-03,  6.53447573e-03,
        -2.60262804e-04,  2.35460888e-03,  2.84552663e-03, -3.25396701e-03,
        -4.41604106e-04,  2.62325643e-04]),
 0.0031694076647973165)

In [16]:
path_to_pkl_dir = '../data/raw/synthetic_soil_pi_daily'
path_to_info = '../data/raw/synthetic_soil.zip'
nr_files = 50
test_cods(path_to_pkl_dir, path_to_info, nr_files, dataset_name='soil', offset=0)

The residual of 1th dataset is 0.002642847660073642
The residual of 2th dataset is 0.00019915794312497584
The residual of 3th dataset is 0.0004406987260185957
The residual of 4th dataset is 0.004808275790369718
The residual of 5th dataset is 0.0038436847885193392
The residual of 6th dataset is 0.0029371902933214705
The residual of 7th dataset is 0.002277213168170939
The residual of 8th dataset is 0.0010699069363765447
The residual of 9th dataset is 0.003952516395362246
The residual of 10th dataset is 0.000290069949818535
The residual of 11th dataset is 0.001034701752956212
The residual of 12th dataset is 0.002033827375923671
The residual of 13th dataset is 0.00894737374403961
The residual of 14th dataset is 0.0007018265588630349
The residual of 15th dataset is 0.004726630157454822
The residual of 16th dataset is 0.0007188042700594869
The residual of 17th dataset is 0.00279406103973384
The residual of 18th dataset is 0.0019517745746555314
The residual of 19th dataset is 0.00058807351751

(array([ 2.64284766e-03,  1.99157943e-04,  4.40698726e-04,  4.80827579e-03,
         3.84368479e-03,  2.93719029e-03,  2.27721317e-03,  1.06990694e-03,
         3.95251640e-03,  2.90069950e-04,  1.03470175e-03,  2.03382738e-03,
         8.94737374e-03,  7.01826559e-04,  4.72663016e-03,  7.18804270e-04,
         2.79406104e-03,  1.95177457e-03,  5.88073518e-04,  3.49267211e-03,
        -9.35667285e-04,  1.00050276e-03, -2.64604048e-04, -2.30700703e-04,
        -1.56146474e-03,  1.36883785e-03,  1.47653720e-03,  1.84158176e-03,
         1.41226590e-03, -4.35966028e-04, -1.21948578e-05, -7.98968304e-05,
        -3.81250602e-04,  1.17300063e-03,  1.03512114e-03,  3.37127649e-03,
         1.45539157e-03,  1.30096498e-03, -9.93940730e-05,  2.43997984e-04,
         7.34085334e-04, -5.16535336e-04,  1.23711867e-03, -1.94267955e-03,
         1.32962598e-03,  7.81031791e-04,  8.94152786e-04,  6.31169244e-03,
         1.21226249e-03, -2.91476807e-04]),
 0.0024130553517099835)