# Importing Header and SoilPrep

In [1]:
import import_ipynb 
from Header import *

importing Jupyter notebook from Header.ipynb


In [2]:
import import_ipynb
from SoilPrep import * 

importing Jupyter notebook from SoilPrep.ipynb


# Step 0: Setting up decision parameters (Data Tree)

In [3]:
# 0. Available smoothing filter types: savgol1 and savgol2 ------------------------ (0)
sg_filters = ['sg2']

# 0. Available window lengths for the smoothing filter ---------------------------- (0)
window_lengths = [0, 1, 11, 21, 31, 41, 51, 71, 91]

# 1. Available machine learning regression models --------------------------------- (1)
ml_methods = ['ridge', 'svr', 'plsr', 'cubist', 'gbrt']

# 2. Names of target variables in the dataframe ----------------------------------- (2)
target_names = ['TOC']

# 3. Available preprocessing for Target data -------------------------------------- (3)
prepare_target = ['none']
# 4. Available preprocessing for Spectral data ------------------------------------ (4)
prepare_spec = ['none', 'cr', 'log', 'fod', 'fod_cr', 'fod_log']

# 5. Resampling bands available for spectra --------------------------------------- (5) 
nbands_sampling = [0,3,5,7,9,10,11,13,15,17,19,20,21,23,25,27,29,30,31,33,35,37,39, 40, 50, 55, 60, 70, 80, 90, 100]


# Setting colours for different targets        --------------------------------------
clr = ['green']

# Colour scheme definition
# kado = '#8B7355'
# mati = '#A52A2A'
# balu = '#F4A460'

In [4]:
MetaData = {'sg_filters' : sg_filters, 'window_lengths' : window_lengths, 'prepare_spec' : prepare_spec, \
            'nbands_sampling' : nbands_sampling, 'target_names' : target_names, 'prepare_target' : prepare_target,\
            'ml_methods' : ml_methods, 'clr' : clr}

with open ('meta_data.pickle', 'wb') as file:
    pickle.dump(MetaData, file)

# Step 1a: Obtaining Spectra (Noise and Outliers removal)

In [5]:
df = pd.read_csv('Italy.csv')
df.rename(columns = {'OC%': 'TOC'}, inplace = True)
df.head(1)

Unnamed: 0,TLV_idx,IT_idx,TAG_IT,XmUTM33,YmUTM33,soil depth (cm),average soil depth (cm),Texture class,Sand%,Silt%,Clay%,BD g/cm3,pH,CaCO3%,TOC,Carbon stock,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383,...,2451,2452,2453,2454,2455,2456,2457,2458,2459,2460,2461,2462,2463,2464,2465,2466,2467,2468,2469,2470,2471,2472,2473,2474,2475,2476,2477,2478,2479,2480,2481,2482,2483,2484,2485,2486,2487,2488,2489,2490,2491,2492,2493,2494,2495,2496,2497,2498,2499,2500
0,1,126629.0,C2_4664,502765.35,4488814.14,0-7,3.5,sandy loam,47.75,27.62,24.63,1.506327,6.3,5.863,1.4,0.007381,0.074932,0.07695,0.069572,0.074459,0.084597,0.082278,0.084486,0.084062,0.078833,0.064853,0.065676,0.08092,0.078477,0.074822,0.073595,0.079095,0.073128,0.067866,0.073805,0.075564,0.077744,0.079301,0.076749,0.075715,0.078941,0.080916,0.076129,0.07307,0.077287,0.08146,0.081923,0.080122,0.079522,0.078779,...,0.389512,0.389017,0.388596,0.388203,0.387496,0.386679,0.385995,0.385343,0.384979,0.384798,0.384509,0.383982,0.383455,0.382667,0.381661,0.381222,0.380977,0.380462,0.38002,0.379234,0.377965,0.377186,0.376435,0.376151,0.37586,0.375003,0.374733,0.374094,0.373085,0.372541,0.371912,0.371266,0.370933,0.370718,0.370055,0.368958,0.368165,0.367614,0.367514,0.367988,0.368236,0.368705,0.368515,0.367891,0.367332,0.365998,0.365176,0.364963,0.365163,0.36501


In [6]:
temp_spec = df.iloc[:, 16::].copy()
spectra_df = temp_spec.iloc[:,50::].copy()
target_df = df.loc[:, [target_names[0]]].copy()

In [7]:
spectra_df.head(2)

Unnamed: 0,400,401,402,403,404,405,406,407,408,409,410,411,412,413,414,415,416,417,418,419,420,421,422,423,424,425,426,427,428,429,430,431,432,433,434,435,436,437,438,439,440,441,442,443,444,445,446,447,448,449,...,2451,2452,2453,2454,2455,2456,2457,2458,2459,2460,2461,2462,2463,2464,2465,2466,2467,2468,2469,2470,2471,2472,2473,2474,2475,2476,2477,2478,2479,2480,2481,2482,2483,2484,2485,2486,2487,2488,2489,2490,2491,2492,2493,2494,2495,2496,2497,2498,2499,2500
0,0.08605,0.088625,0.088922,0.088726,0.089113,0.088972,0.088203,0.087449,0.088015,0.08971,0.091106,0.090524,0.09188,0.092508,0.091135,0.092036,0.092789,0.093127,0.093511,0.094088,0.094568,0.094474,0.095251,0.09641,0.097614,0.097859,0.098206,0.099107,0.09917,0.099204,0.099553,0.100929,0.102627,0.103838,0.104182,0.104811,0.105346,0.105437,0.105945,0.106878,0.107915,0.108362,0.108999,0.109905,0.110684,0.111204,0.111684,0.112573,0.113481,0.113694,...,0.389512,0.389017,0.388596,0.388203,0.387496,0.386679,0.385995,0.385343,0.384979,0.384798,0.384509,0.383982,0.383455,0.382667,0.381661,0.381222,0.380977,0.380462,0.38002,0.379234,0.377965,0.377186,0.376435,0.376151,0.37586,0.375003,0.374733,0.374094,0.373085,0.372541,0.371912,0.371266,0.370933,0.370718,0.370055,0.368958,0.368165,0.367614,0.367514,0.367988,0.368236,0.368705,0.368515,0.367891,0.367332,0.365998,0.365176,0.364963,0.365163,0.36501
1,0.125223,0.128504,0.12912,0.128924,0.129251,0.129067,0.128753,0.128626,0.129218,0.130684,0.132027,0.131676,0.133955,0.135213,0.133668,0.134791,0.13595,0.136851,0.137534,0.138047,0.138528,0.138977,0.140021,0.141322,0.142934,0.143437,0.144069,0.145405,0.145694,0.146237,0.147058,0.148308,0.149947,0.151392,0.152344,0.153188,0.153987,0.154531,0.154996,0.156035,0.15745,0.158115,0.159066,0.160308,0.161186,0.161913,0.162628,0.16377,0.164859,0.165325,...,0.470734,0.470214,0.4697,0.469578,0.469052,0.468403,0.468044,0.467167,0.466103,0.465645,0.465378,0.464997,0.464733,0.464414,0.463514,0.462855,0.46288,0.4621,0.461291,0.461049,0.460534,0.460264,0.45963,0.459034,0.458453,0.45771,0.457637,0.457607,0.456742,0.455889,0.455544,0.454833,0.454801,0.455204,0.454618,0.453573,0.452215,0.451102,0.450939,0.452055,0.452472,0.452662,0.452495,0.451723,0.450718,0.449806,0.448387,0.44715,0.447417,0.446394


In [8]:
target_df.head(2)

Unnamed: 0,TOC
0,1.4
1,1.8


## Step 0- Data Imputation

In [9]:
# Identify the index of the sample with missing spectra
missing_spectra_index = spectra_df[spectra_df.isnull().any(axis=1)].index
missing_spectra_index

Int64Index([801], dtype='int64')

In [10]:
# Drop the sample with missing spectra from both features and target
spectra_df.drop(missing_spectra_index, inplace=True)
target_df.drop(missing_spectra_index, inplace=True)

In [11]:
# Ensure the indices are aligned
spectra_df.reset_index(drop=True, inplace=True)
target_df.reset_index(drop=True, inplace=True)


In [12]:
spectra_df.shape

(935, 2101)

In [13]:
spectra_df

Unnamed: 0,400,401,402,403,404,405,406,407,408,409,410,411,412,413,414,415,416,417,418,419,420,421,422,423,424,425,426,427,428,429,430,431,432,433,434,435,436,437,438,439,440,441,442,443,444,445,446,447,448,449,...,2451,2452,2453,2454,2455,2456,2457,2458,2459,2460,2461,2462,2463,2464,2465,2466,2467,2468,2469,2470,2471,2472,2473,2474,2475,2476,2477,2478,2479,2480,2481,2482,2483,2484,2485,2486,2487,2488,2489,2490,2491,2492,2493,2494,2495,2496,2497,2498,2499,2500
0,0.086050,0.088625,0.088922,0.088726,0.089113,0.088972,0.088203,0.087449,0.088015,0.089710,0.091106,0.090524,0.091880,0.092508,0.091135,0.092036,0.092789,0.093127,0.093511,0.094088,0.094568,0.094474,0.095251,0.096410,0.097614,0.097859,0.098206,0.099107,0.099170,0.099204,0.099553,0.100929,0.102627,0.103838,0.104182,0.104811,0.105346,0.105437,0.105945,0.106878,0.107915,0.108362,0.108999,0.109905,0.110684,0.111204,0.111684,0.112573,0.113481,0.113694,...,0.389512,0.389017,0.388596,0.388203,0.387496,0.386679,0.385995,0.385343,0.384979,0.384798,0.384509,0.383982,0.383455,0.382667,0.381661,0.381222,0.380977,0.380462,0.380020,0.379234,0.377965,0.377186,0.376435,0.376151,0.375860,0.375003,0.374733,0.374094,0.373085,0.372541,0.371912,0.371266,0.370933,0.370718,0.370055,0.368958,0.368165,0.367614,0.367514,0.367988,0.368236,0.368705,0.368515,0.367891,0.367332,0.365998,0.365176,0.364963,0.365163,0.365010
1,0.125223,0.128504,0.129120,0.128924,0.129251,0.129067,0.128753,0.128626,0.129218,0.130684,0.132027,0.131676,0.133955,0.135213,0.133668,0.134791,0.135950,0.136851,0.137534,0.138047,0.138528,0.138977,0.140021,0.141322,0.142934,0.143437,0.144069,0.145405,0.145694,0.146237,0.147058,0.148308,0.149947,0.151392,0.152344,0.153188,0.153987,0.154531,0.154996,0.156035,0.157450,0.158115,0.159066,0.160308,0.161186,0.161913,0.162628,0.163770,0.164859,0.165325,...,0.470734,0.470214,0.469700,0.469578,0.469052,0.468403,0.468044,0.467167,0.466103,0.465645,0.465378,0.464997,0.464733,0.464414,0.463514,0.462855,0.462880,0.462100,0.461291,0.461049,0.460534,0.460264,0.459630,0.459034,0.458453,0.457710,0.457637,0.457607,0.456742,0.455889,0.455544,0.454833,0.454801,0.455204,0.454618,0.453573,0.452215,0.451102,0.450939,0.452055,0.452472,0.452662,0.452495,0.451723,0.450718,0.449806,0.448387,0.447150,0.447417,0.446394
2,0.078841,0.082845,0.083003,0.082566,0.083161,0.082521,0.081488,0.080820,0.081475,0.082961,0.084263,0.084170,0.085992,0.086719,0.085114,0.086233,0.087137,0.087427,0.087876,0.088112,0.088282,0.088596,0.089657,0.091029,0.092409,0.092228,0.092504,0.093871,0.093848,0.093815,0.094178,0.095416,0.097016,0.098352,0.099094,0.099669,0.100162,0.100495,0.100944,0.101849,0.102970,0.103388,0.104164,0.105287,0.106133,0.106721,0.107253,0.108153,0.109164,0.109564,...,0.419618,0.419201,0.419235,0.419245,0.418774,0.418386,0.417953,0.417484,0.417426,0.417648,0.417799,0.417554,0.417191,0.416577,0.415714,0.415394,0.415310,0.415355,0.415364,0.415019,0.414706,0.414252,0.413760,0.413679,0.413626,0.413640,0.413499,0.413422,0.413150,0.412304,0.411942,0.411355,0.410592,0.410782,0.410603,0.410282,0.410224,0.409615,0.409494,0.409801,0.410048,0.410668,0.410255,0.408850,0.408418,0.407964,0.408272,0.408526,0.408821,0.408623
3,0.107160,0.111234,0.111728,0.111294,0.111636,0.111437,0.110872,0.110340,0.110619,0.112382,0.114265,0.114230,0.115957,0.116692,0.115173,0.116076,0.117204,0.118109,0.118285,0.118751,0.119455,0.119837,0.120574,0.121548,0.122908,0.123248,0.123733,0.124912,0.125330,0.125474,0.125661,0.126911,0.128639,0.130024,0.130626,0.131218,0.131716,0.131881,0.132198,0.133043,0.134199,0.134820,0.135472,0.136268,0.136935,0.137519,0.138098,0.139016,0.139800,0.139978,...,0.345616,0.345630,0.345715,0.345638,0.345621,0.345381,0.345310,0.345162,0.344876,0.344983,0.344802,0.344657,0.344502,0.343892,0.343351,0.342833,0.342943,0.343015,0.342790,0.343002,0.342811,0.342809,0.343055,0.342866,0.342580,0.342383,0.342072,0.341895,0.341663,0.341395,0.341302,0.341414,0.341932,0.342044,0.341977,0.342129,0.341610,0.340877,0.341046,0.341083,0.341190,0.342120,0.342119,0.341154,0.340982,0.340960,0.340859,0.340786,0.340482,0.339958
4,0.080961,0.084310,0.084454,0.084299,0.085174,0.084578,0.083984,0.083615,0.083194,0.084502,0.086133,0.086012,0.088030,0.089104,0.087833,0.088994,0.089841,0.090079,0.090474,0.090832,0.091194,0.091578,0.092827,0.094251,0.095456,0.095612,0.096055,0.097251,0.097414,0.097649,0.098260,0.099717,0.101419,0.102735,0.103390,0.103985,0.104657,0.105258,0.105722,0.106732,0.108062,0.108425,0.109210,0.110439,0.111406,0.112302,0.112972,0.113609,0.114581,0.115092,...,0.475934,0.475108,0.474521,0.474048,0.473229,0.472867,0.472195,0.471140,0.471101,0.471033,0.470645,0.470096,0.469157,0.468177,0.467288,0.466977,0.466934,0.466516,0.465867,0.465267,0.464512,0.463861,0.463486,0.463186,0.462783,0.462289,0.461936,0.461288,0.460185,0.459743,0.459116,0.458220,0.458318,0.457635,0.456083,0.455587,0.455209,0.455050,0.455669,0.455049,0.455049,0.454514,0.453996,0.454054,0.453681,0.453329,0.453179,0.452266,0.452037,0.451963
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
930,0.082509,0.086094,0.086628,0.086086,0.085796,0.084353,0.085006,0.086789,0.086376,0.087283,0.088032,0.086318,0.087186,0.088396,0.088106,0.087527,0.087899,0.089199,0.088976,0.088618,0.088955,0.090331,0.090656,0.090489,0.090936,0.091222,0.091554,0.092116,0.092249,0.092824,0.093458,0.093528,0.093711,0.094078,0.094694,0.095337,0.095895,0.096237,0.096855,0.097382,0.097661,0.097782,0.098364,0.099244,0.099734,0.099888,0.100236,0.101248,0.101155,0.101309,...,0.272695,0.272203,0.271635,0.271394,0.271304,0.270942,0.270654,0.270305,0.269968,0.269562,0.269116,0.268841,0.268260,0.267568,0.267085,0.266313,0.266031,0.266142,0.265852,0.265670,0.265171,0.264492,0.263990,0.263861,0.263965,0.263895,0.263598,0.262912,0.261846,0.260718,0.260092,0.259639,0.259325,0.259248,0.258821,0.258777,0.258889,0.258657,0.258907,0.258567,0.258499,0.258647,0.258418,0.258190,0.257838,0.256938,0.256365,0.255700,0.254954,0.254814
931,0.132244,0.135997,0.136787,0.136215,0.135768,0.134739,0.135507,0.137491,0.138534,0.139440,0.139996,0.139216,0.140387,0.141747,0.141744,0.141586,0.142470,0.144451,0.144559,0.144395,0.145061,0.147079,0.147881,0.148313,0.149850,0.150548,0.151115,0.152182,0.153001,0.154091,0.155113,0.156090,0.156835,0.157481,0.158513,0.159851,0.161113,0.161937,0.163281,0.164370,0.164998,0.165676,0.166767,0.168143,0.169134,0.169640,0.170246,0.171660,0.171952,0.172468,...,0.399849,0.399154,0.398653,0.398170,0.397515,0.396864,0.396056,0.395309,0.394807,0.394480,0.394404,0.394249,0.393990,0.393268,0.392303,0.391475,0.390715,0.390426,0.390210,0.389529,0.388712,0.387879,0.387049,0.386724,0.386416,0.386193,0.385819,0.385031,0.384192,0.383234,0.382494,0.382178,0.382151,0.381913,0.381346,0.380454,0.379154,0.378316,0.378123,0.378144,0.378155,0.378140,0.377803,0.377186,0.376872,0.376682,0.375930,0.375761,0.375408,0.375020
932,0.108848,0.111724,0.112317,0.111861,0.111530,0.110682,0.112256,0.114790,0.114345,0.114559,0.115176,0.114641,0.115746,0.116817,0.116621,0.116933,0.117871,0.119289,0.119103,0.119282,0.120367,0.122092,0.122916,0.123514,0.124967,0.125366,0.125839,0.127091,0.128139,0.129247,0.130229,0.131402,0.132254,0.132978,0.134116,0.135276,0.136457,0.137532,0.138980,0.140285,0.141193,0.141762,0.142611,0.143922,0.145367,0.146058,0.146714,0.148202,0.148289,0.148705,...,0.360423,0.359990,0.359678,0.359457,0.359123,0.358401,0.357694,0.357390,0.356825,0.356244,0.356005,0.355247,0.354701,0.354512,0.353763,0.353158,0.352903,0.352516,0.351816,0.350856,0.350191,0.349187,0.347858,0.347406,0.346793,0.346521,0.346921,0.346635,0.345941,0.345053,0.344357,0.343947,0.343498,0.343413,0.342681,0.341786,0.342013,0.341489,0.340574,0.340418,0.339918,0.339725,0.339241,0.337776,0.337321,0.337169,0.336822,0.338064,0.338108,0.337685
933,0.106474,0.104166,0.106237,0.108701,0.109596,0.109279,0.109276,0.109909,0.110939,0.110382,0.110055,0.111276,0.112451,0.113170,0.113056,0.112590,0.112319,0.112998,0.114307,0.114951,0.115198,0.115715,0.116502,0.117105,0.117734,0.118549,0.118960,0.119167,0.119757,0.120763,0.121599,0.121935,0.122685,0.123570,0.124291,0.125004,0.125594,0.125900,0.126603,0.127352,0.128066,0.129100,0.129662,0.129987,0.130519,0.130953,0.131488,0.132526,0.133123,0.133475,...,0.293291,0.293005,0.292905,0.292314,0.292135,0.291846,0.291169,0.291030,0.290742,0.290333,0.290109,0.289463,0.289178,0.289090,0.288633,0.288558,0.288052,0.287360,0.287207,0.286771,0.286309,0.285734,0.284987,0.284767,0.284846,0.284996,0.284702,0.283826,0.282806,0.282144,0.282088,0.282018,0.281973,0.281971,0.281456,0.280992,0.280655,0.280140,0.279477,0.278641,0.278394,0.278681,0.278434,0.278365,0.277916,0.276382,0.276297,0.275592,0.275286,0.275954


In [14]:
target_df.shape

(935, 1)

In [15]:
assert spectra_df.index.equals(target_df.index), "Indices are not aligned between features and target."


In [16]:
target_df

Unnamed: 0,TOC
0,1.400000
1,1.800000
2,0.662000
3,0.920000
4,2.190000
...,...
930,1.031321
931,1.451488
932,1.126813
933,0.913440


In [17]:
# from sklearn.impute import KNNImputer
# from sklearn.model_selection import KFold
# # Identify non-missing values
non_missing_indices = target_df.dropna().index
missing_indices = target_df[target_df.isna().any(axis=1)].index

In [18]:
missing_indices

Int64Index([302, 303, 304, 305, 306, 307, 308, 309, 310, 311,
            ...
            671, 680, 695, 725, 730, 733, 734, 739, 775, 781],
           dtype='int64', length=140)

In [19]:
non_missing_indices

Int64Index([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,
            ...
            925, 926, 927, 928, 929, 930, 931, 932, 933, 934],
           dtype='int64', length=795)

In [20]:
# Drop the sample with missing spectra from both features and target
spectra_df.drop(missing_indices, inplace=True)
target_df.drop(missing_indices, inplace=True)
# Ensure the indices are aligned
spectra_df.reset_index(drop=True, inplace=True)
target_df.reset_index(drop=True, inplace=True)


In [21]:
assert spectra_df.index.equals(target_df.index), "Indices are not aligned between features and target."


In [22]:
spectra_df.shape

(795, 2101)

In [23]:
target_df.shape

(795, 1)

# Step 1b: Obtaining Targets (Outliers removal and Normalization)

In [None]:

def isolate_targets(df, target_names):
    T=[]
    for i in range (0,len(target_names)):
        T.append(df[target_names[i]])
    return(T)
    
T = isolate_targets(target_df,target_names)

In [None]:
T

# Step 1c: Spectra Preprocessing (Smooth, FOD/Contin/Log , and Resample)

In [None]:
spectra= spectra_df.copy()

## Savgol smoothing (order 1 and order 2)

In [None]:
# -------------- Smoothed Spectra spec1 (savgol order 1) and spec2 (savgol order 2)  -----------

spec1 = {}
for i in window_lengths:
    spec1[i] = filt_sg(spectra, i, 'sg1')                   

spec2 = {}
for i in window_lengths:
    spec2[i] = filt_sg(spectra, i, 'sg2')

smth_spec = sgsmooth (spectra, 3)    

In [None]:
fod_spec = fod(smth_spec)

for i in range (0,5,1):
    fod_spec.iloc[i,:].plot()


## Continuum Removal

In [None]:
cr_spec = continuum_removed(spec2[51])

for i in range (0,5,1):
    cr_spec.iloc[i,:].plot()
    

## log(1/R) Transformation

In [None]:
log_spec = ((1/spec2[51]).apply(np.log)).copy()
#log_spec.head(5)

for i in range (0,5,1):
    log_spec.iloc[i,:].plot()   

## Resampling (n_bands)

### 1. Sampled Original (sampled_spec)

In [None]:
sampled_spec = {}
for n in nbands_sampling:
    sampled_spec[n] = resample_spectra (spec2[51], n)

In [None]:
for i in range (0,5,1):
    sampled_spec[50].iloc[i,:].plot()

### 2. Sampled Continuum Removed  (sampled_cr)

In [None]:
sampled_cr = {}
for n in nbands_sampling:
    sampled_cr[n] = resample_spectra (cr_spec, n)

In [None]:
for i in range (0,5,1):
    sampled_cr[50].iloc[i,:].plot()

### 3. Sampled Log (sampled_log)

In [None]:
sampled_log = {}
for n in nbands_sampling:
    sampled_log[n] = resample_spectra (log_spec, n)

In [None]:
for i in range (0,10,1):
    sampled_log[50].iloc[i,:].plot()

### 4.  FOD of sampled spectra (fod_sampled)

In [None]:
fod_sampled = {}
for n in nbands_sampling:
    fod_sampled[n] = fod (sampled_spec[n])

In [None]:
for i in range (0,10,1):
    fod_sampled[50].iloc[i,:].plot()

### 5. FOD of sampled_cr (fod_sampledcr)

In [None]:
fod_cr = {}
for n in nbands_sampling:
    fod_cr[n] = fod (sampled_cr[n])

In [None]:
for i in range (0,10,1):
    fod_cr[50].iloc[i,:].plot()

### 6. FOD of sampled_log (fod_sampledlog)

In [None]:
fod_log = {}
for n in nbands_sampling:
    fod_log[n] = fod (sampled_log[n])

In [None]:
for i in range (0,10,1):
    fod_log[50].iloc[i,:].plot()

## Visualizing Processed Spectrum (variable samples)

In [None]:
(row, col) = spectra.shape
(row, col)

In [None]:
def plot_spec (sample, process):
    (row, col) = spectra.shape
    x1 = spec2[51].iloc[sample,:]
    x1.plot()
    if process == 'cr':
        x2 = cr_spec.iloc[sample,:]
        x2.plot()
    elif process == 'log':
        x3 = log_spec.iloc[sample,:]/3
        x3.plot()
    else:
        x4 = fod_spec.iloc[sample,:]*100
        x4.plot()
        
    plt.ylim([-0.6, 0.9])

ipywidgets.interact(plot_spec, sample = (0, row, 1), process = prepare_spec)

## Correlation between wavelengths and Targets

In [None]:
def find_rpval (spectra, tar):
    (r, c) = spectra.shape
    
    r_val = spectra.iloc[[0], :].copy()
    p_val = spectra.iloc[[0], :].copy()
    
    for j in range(0, c):
        print("Length of tar:", len(tar))
        print("Length of spectra.iloc[:, j]:", len(spectra.iloc[:, j]))

        r_val.iloc[0,j], p_val.iloc[0,j] = stats.pearsonr(tar, spectra.iloc[:, j])
    
    return(r_val, p_val)


In [None]:
plt.style.use(['science','notebook','grid'])

def plot_corr (target, prepare, n_bands):
    
    i = target_names.index(target)    
    
    if  prepare == 'none':
        r_val, p_val = find_rpval (sampled_spec[n_bands], T[i])
        r_val.iloc[0,:].plot(color = clr[i])
    elif  prepare == 'cr':
        r_cr, p_cr = find_rpval (sampled_cr[n_bands], T[i])
        r_cr.iloc[0,:].plot(color = clr[i])
    elif prepare == 'log':
        r_log, p_log = find_rpval (sampled_log[n_bands], T[i])
        r_log.iloc[0,:].plot(color = clr[i])
    elif prepare == 'fod_spec':    
        r_fod, p_fod = find_rpval (fod_sampled[n_bands], T[i])
        r_fod.iloc[0,:].plot(color = clr[i]) 
    elif prepare == 'fod_cr':    
        r_sfodcr, p_sfodcr = find_rpval (fod_cr[n_bands], T[i])
        r_sfodcr.iloc[0,:].plot(color = clr[i]) 
    else:   
        r_sfodlog, p_sfodlog = find_rpval (fod_log[n_bands], T[i])
        r_sfodlog.iloc[0,:].plot(color = clr[i]) 
    
    plt.ylim([-0.9, 0.9])


ipywidgets.interact(plot_corr, target = target_names, prepare = prepare_spec, n_bands = nbands_sampling)



In [None]:
Data = {'spectra' : spec2[51], 'T' : T, 'spec1': spec1, 'spec2': spec2, 'smth_spec' : smth_spec, 'fod_spec' : fod_spec,  \
       'cr_spec' : cr_spec,  'log_spec' : log_spec, 'sampled_spec' : sampled_spec, 'sampled_cr' : sampled_cr, \
        'fod_sampled' : fod_sampled, 'sampled_log' : sampled_log, 'fod_cr' : fod_cr, 'fod_log' : fod_log}

In [None]:
with open ('data.pickle', 'wb') as file:
    pickle.dump(Data, file)