This notebook was used to tidy up the extracted data. This involves checking that ids match, removing unneeded columns and stacking together separate indices into a single data frame.

# 1. Extracted S2-indices for 2017 (DONE)

- tidy up the data
- add "id_17" index

In [1]:
import pandas as pd

In [2]:
%%time

src_pth = f".\\csvs\\ZV2017_extracted_ndvi.csv"

df0 = pd.read_csv(src_pth)

Wall time: 3.13 s


In [3]:
df0.tail()

Unnamed: 0.1,Unnamed: 0,GERK_PID,POLJINA_ID,RABA_ID,POVR_GERK_,SIFRA_KMRS,OBRAZEC,INTERSECT,POVR_ar,REGIJA,...,p_17-10-08,p_17-10-18,pc075_m02,pc075_m04,pc075_m05,pc075_m06,pc075_m07,pc075_m08,pc075_m09,pc075_m10
664133,664133,1892284,2976726,1300,8760.9728,204,GP,0,87.609728,2,...,,,0.548966,0.871466,0.835215,0.883261,0.643689,0.81448,0.81448,0.81448
664134,664134,3588000,2976727,1300,7807.7304,204,GP,0,78.077304,2,...,,,0.541856,0.880461,0.788361,0.884401,0.791368,0.774565,0.774565,0.774565
664135,664135,1646627,2976728,1300,7559.2166,204,GP,0,75.592166,2,...,,,0.445272,0.819034,0.873336,0.664987,0.590871,0.608201,0.608201,0.608201
664136,664136,3588612,2976729,1300,6120.1665,204,GP,0,61.201665,2,...,,,0.573511,0.879573,0.900252,0.865085,0.645182,0.663612,0.663612,0.663612
664137,664137,3588639,2976730,1300,5015.4655,204,GP,0,50.154655,2,...,,,0.625246,0.913037,0.901425,0.884859,0.669561,0.764977,0.764977,0.764977


In [4]:
suffs = ["evi2", "ndvi", "ndwi", "savi"]
for suff in suffs:
    # Open file
    src_pth = f".\\csvs\\ZV2017_extracted_{suff}.csv"
    df0 = pd.read_csv(src_pth)
    
    # Drop unnecessary columns
    df_tidy = df0.drop(columns=df0.iloc[:, 0:12])  # KMRS data
    df_tidy = df_tidy.drop(columns=df0.iloc[:, -8:])  # Monthly values

    # Rename "p_17" to "ndvi" (and other indices)
    df_tidy.columns = df_tidy.columns.str.replace("p_17-", f"{suff}_")

    # Add the "id_17" index column
    df_tidy = df_tidy.reset_index()
    df_tidy = df_tidy.rename(columns={"index": "id_17"})

    print(df_tidy.tail(1))
    
    # Save to csv
    df_tidy.to_csv(f".\\results_extracted\\ZV2017_extracted_{suff}.csv", index=False)

         id_17  evi2_04-11  evi2_04-21  evi2_05-01  evi2_05-11  evi2_05-21  \
664137  664137         NaN    0.779873         NaN         NaN    0.827327   

        evi2_05-31  evi2_06-10  evi2_06-20  evi2_06-30  evi2_07-10  \
664137         NaN    0.751983         NaN         NaN    0.429898   

        evi2_07-20  evi2_07-30  evi2_08-09  evi2_08-19  evi2_08-29  \
664137    0.449394         NaN    0.579503         NaN    0.520069   

        evi2_09-08  evi2_10-08  evi2_10-18  
664137         NaN         NaN         NaN  
         id_17  ndvi_04-11  ndvi_04-21  ndvi_05-01  ndvi_05-11  ndvi_05-21  \
664137  664137         NaN    0.913037         NaN         NaN    0.901425   

        ndvi_05-31  ndvi_06-10  ndvi_06-20  ndvi_06-30  ndvi_07-10  \
664137         NaN    0.884859         NaN         NaN     0.56709   

        ndvi_07-20  ndvi_07-30  ndvi_08-09  ndvi_08-19  ndvi_08-29  \
664137    0.772031         NaN    0.749255         NaN    0.780699   

        ndvi_09-08  ndvi_10-08  

# 2. Extracted S2-rgbn for 2017 (DONE)

- tidy up the data
- add "id_17" index

In [None]:
import pandas as pd

In [5]:
%%time

src_pth = f".\\csvs\\ZV2017_extracted_s2-rgbn.csv"

df0 = pd.read_csv(src_pth)

Wall time: 9.82 s


In [13]:
df0

Unnamed: 0,GERK_PID,POLJINA_ID,RABA_ID,POVR_GERK_,SIFRA_KMRS,OBRAZEC,INTERSECT,POVR_ar,REGIJA,blue_04_01,...,red_10_18,nir_10_18,blue_10_23,green_10_23,red_10_23,nir_10_23,blue_10_28,green_10_28,red_10_28,nir_10_28
0,2537988,2104906,1100,19308.9997,801,GP,0,57.930456,5,0.085712,...,,,,,,,,,,
1,2537988,2104908,1100,19308.9997,5,GP,0,30.520097,5,0.111312,...,,,,,,,,,,
2,2537988,2104910,1100,19308.9997,203,GP,0,26.799442,5,0.082948,...,,,,,,,,,,
3,1143420,2105003,1100,8277.3796,801,GP,0,40.952670,5,0.088406,...,,,,,,,,,,
4,3077991,2652377,1211,14987.7976,100,GP,0,149.877976,4,0.082925,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
664133,1892284,2976726,1300,8760.9728,204,GP,0,87.609728,2,0.075792,...,,,,,,,,,,
664134,3588000,2976727,1300,7807.7304,204,GP,0,78.077304,2,0.090231,...,,,,,,,,,,
664135,1646627,2976728,1300,7559.2166,204,GP,0,75.592166,2,0.072073,...,,,,,,,,,,
664136,3588612,2976729,1300,6120.1665,204,GP,0,61.201665,2,0.072300,...,,,,,,,,,,


In [14]:
# Drop unnecessary columns
df_tidy = df0.drop(columns=df0.iloc[:, 0:9])
   
# Add the "id_17" index column
df_tidy = df_tidy.reset_index()
df_tidy = df_tidy.rename(columns={"index": "id_17"})

df_tidy.tail(3)

Unnamed: 0,id_17,blue_04_01,green_04_01,red_04_01,nir_04_01,blue_04_11,green_04_11,red_04_11,nir_04_11,blue_04_21,...,red_10_18,nir_10_18,blue_10_23,green_10_23,red_10_23,nir_10_23,blue_10_28,green_10_28,red_10_28,nir_10_28
664135,664135,0.072073,0.082949,0.077704,0.342441,,,,,0.067409,...,,,,,,,,,,
664136,664136,0.0723,0.083633,0.075527,0.311218,,,,,0.061263,...,,,,,,,,,,
664137,664137,0.063442,0.075768,0.057569,0.352208,,,,,0.05938,...,,,,,,,,,,


In [15]:
%%time

# Save to csv
df_tidy.to_csv(f".\\results_extracted\\ZV2017_extracted_s2-rgbn.csv", index=False)

Wall time: 1min 20s


# 3. Extracted S1-slc for 2017 (DONE)

Missing rows ---> SOLVED

Only 664106 instead of 664137, that is 31 missing lines. Maybe investigate what crop types were used in the missing lines!

## 3.1 Find missing values

In [69]:
%%time
import geopandas as gpd

s1_pth = f".\\csvs\\ZV2017_extracted_s1-slc.csv"
s2_pth = f".\\poljine_SHP\\ZV2017_d96tm_clean_ids.shp"

df2 = pd.read_csv(s1_pth)
df1 = gpd.read_file(s2_pth)

Wall time: 2min 17s


In [29]:
df22 = df2[["GERK_PID", "POLJINA_ID"]]
df22.tail(5)

Unnamed: 0,GERK_PID,POLJINA_ID
664101,1892284,2976726
664102,3588000,2976727
664103,1646627,2976728
664104,3588612,2976729
664105,3588639,2976730


In [27]:
df11 = df1[["GERK_PID", "POLJINA_ID"]]
df11.tail(33)

Unnamed: 0,GERK_PID,POLJINA_ID
664105,1283404,3062363
664106,913282,3062364
664107,913286,3062365
664108,907589,3062366
664109,907585,3062367
664110,907587,3062368
664111,3708301,3062370
664112,4617320,3071272
664113,4883395,3071273
664114,4553594,3071274


In [36]:
# This is a list of indices from 2017 that are missing from the S1 Data Set
missing_values = pd.concat([df11,df22]).drop_duplicates(keep=False).index
missing_values

Int64Index([ 46616,  47144,  57323,  62652,  62757,  62763,  93813,  94575,
             97683, 109581, 109906, 115507, 116679, 116680, 135561, 154766,
            204863, 260352, 291532, 300465, 326524, 355781, 392932, 429276,
            451339, 530030, 555758, 598878, 649259, 650254, 651497, 656362],
           dtype='int64')

In [70]:
df1.head()

Unnamed: 0,id_17,GERK_PID,POLJINA_ID,RABA_ID,POVR_GERK_,SIFRA_KMRS,OBRAZEC,INTERSECT,POVR_ar,REGIJA,geometry
0,0,2537988,2104906,1100,19308.9997,801,GP,0,57.930456,5,"POLYGON ((547508.347 86829.736, 547545.426 868..."
1,1,2537988,2104908,1100,19308.9997,5,GP,0,30.520097,5,"POLYGON ((547497.850 86785.076, 547578.847 867..."
2,2,2537988,2104910,1100,19308.9997,203,GP,0,26.799442,5,"POLYGON ((547599.139 86850.730, 547670.666 868..."
3,3,1143420,2105003,1100,8277.3796,801,GP,0,40.95267,5,"POLYGON ((547165.958 85941.243, 547179.112 859..."
4,4,3077991,2652377,1211,14987.7976,100,GP,0,149.877976,4,"POLYGON ((419052.089 80240.803, 419056.851 802..."


In [None]:
# Filter clean shape to ge only the missing rows
df_missing = df1.iloc[missing_values]

# Save as shp
df_missing.to_file(".\\poljine_SHP\\ZV2017_missing_S1")


## 3.2 Add missing values

- the 32 data points were calculated using a slightly smaller buffer (-4 meters)
- they need to be added into the correct position, so the id_17 index will be correct

In [85]:
import pandas as pd
import geopandas as gpd

In [86]:
%%time

# Correct order of data points from clean SHP
gdf = gpd.read_file(".\\poljine_SHP\\ZV2017_d96tm_clean_ids.shp")
# Main data set with missing values
df0 = pd.read_csv(".\\csvs\\ZV2017_extracted_s1-slc.csv")
# Missing values
df1 = pd.read_csv(".\\csvs\\ZV2017_missing_S1_slc_missing.csv")

Wall time: 2min 9s


In [87]:
gdf0 = gdf.drop(columns=["geometry"])
gdf0.head(2)

Unnamed: 0,id_17,GERK_PID,POLJINA_ID,RABA_ID,POVR_GERK_,SIFRA_KMRS,OBRAZEC,INTERSECT,POVR_ar,REGIJA
0,0,2537988,2104906,1100,19308.9997,801,GP,0,57.930456,5
1,1,2537988,2104908,1100,19308.9997,5,GP,0,30.520097,5


In [88]:
df0.head(2)

Unnamed: 0,GERK_PID,POLJINA_ID,RABA_ID,POVR_GERK_,SIFRA_KMRS,OBRAZEC,INTERSECT,POVR_ar,REGIJA,SIG_ASC_VH_04_01,...,COH_DES_VH_10_22,COH_DES_VV_10_22,SIG_ASC_VH_10_28,SIG_ASC_VV_10_28,SIG_DES_VH_10_28,SIG_DES_VV_10_28,COH_ASC_VH_10_28,COH_ASC_VV_10_28,COH_DES_VH_10_28,COH_DES_VV_10_28
0,2537988,2104906,1100,19308.9997,801,GP,0,57.930456,5,0.01296,...,0.507982,0.524422,0.031385,0.183504,0.027705,0.180794,0.689269,0.752774,0.580919,0.686584
1,2537988,2104908,1100,19308.9997,5,GP,0,30.520097,5,0.009604,...,0.460377,0.570117,0.036393,0.204009,0.029903,0.234453,0.740559,0.745909,0.585225,0.734621


In [89]:
df1.head(2)

Unnamed: 0,id_17,GERK_PID,POLJINA_ID,RABA_ID,POVR_GERK_,SIFRA_KMRS,OBRAZEC,INTERSECT,POVR_ar,REGIJA,...,COH_DES_VH_10_22,COH_DES_VV_10_22,SIG_ASC_VH_10_28,SIG_ASC_VV_10_28,SIG_DES_VH_10_28,SIG_DES_VV_10_28,COH_ASC_VH_10_28,COH_ASC_VV_10_28,COH_DES_VH_10_28,COH_DES_VV_10_28
0,46616,759714,2829278,1100,2304.0624,809,PP,1,23.040624,5,...,0.464669,0.61585,0.011288,0.065602,0.011994,0.086846,0.415111,0.623159,0.363966,0.573629
1,47144,759714,2803530,1100,2304.0624,207,GP,1,23.040624,5,...,0.464669,0.61585,0.011288,0.065602,0.011994,0.086846,0.415111,0.623159,0.363966,0.573629


In [90]:
df3 = pd.merge(gdf0 , df0, how='left', on='POLJINA_ID', suffixes=('', '_DROP')).filter(regex='^(?!.*_DROP)')
df3

Unnamed: 0,id_17,GERK_PID,POLJINA_ID,RABA_ID,POVR_GERK_,SIFRA_KMRS,OBRAZEC,INTERSECT,POVR_ar,REGIJA,...,COH_DES_VH_10_22,COH_DES_VV_10_22,SIG_ASC_VH_10_28,SIG_ASC_VV_10_28,SIG_DES_VH_10_28,SIG_DES_VV_10_28,COH_ASC_VH_10_28,COH_ASC_VV_10_28,COH_DES_VH_10_28,COH_DES_VV_10_28
0,0,2537988,2104906,1100,19308.9997,801,GP,0,57.930456,5,...,0.507982,0.524422,0.031385,0.183504,0.027705,0.180794,0.689269,0.752774,0.580919,0.686584
1,1,2537988,2104908,1100,19308.9997,005,GP,0,30.520097,5,...,0.460377,0.570117,0.036393,0.204009,0.029903,0.234453,0.740559,0.745909,0.585225,0.734621
2,2,2537988,2104910,1100,19308.9997,203,GP,0,26.799442,5,...,0.433481,0.440421,0.018985,0.109362,0.017518,0.079535,0.577215,0.682407,0.377640,0.448435
3,3,1143420,2105003,1100,8277.3796,801,GP,0,40.952670,5,...,0.426090,0.476744,0.020489,0.098561,0.020738,0.099730,0.447147,0.398452,0.454416,0.428645
4,4,3077991,2652377,1211,14987.7976,100,GP,0,149.877976,4,...,0.380814,0.430132,0.023058,0.067281,0.021289,0.080768,0.402380,0.496410,0.295891,0.316103
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
664133,664133,1892284,2976726,1300,8760.9728,204,GP,0,87.609728,2,...,0.321100,0.413124,0.014662,0.047783,0.046331,0.219224,0.283349,0.303850,0.255324,0.243495
664134,664134,3588000,2976727,1300,7807.7304,204,GP,0,78.077304,2,...,0.399328,0.465319,0.012253,0.039134,0.030596,0.129961,0.326760,0.355066,0.292682,0.346647
664135,664135,1646627,2976728,1300,7559.2166,204,GP,0,75.592166,2,...,0.355053,0.548025,0.024380,0.065607,0.039934,0.218525,0.421177,0.368866,0.247987,0.284292
664136,664136,3588612,2976729,1300,6120.1665,204,GP,0,61.201665,2,...,0.338016,0.299834,0.008648,0.023843,0.027773,0.128125,0.378907,0.417516,0.312166,0.233042


In [91]:
missing_poljine = df1["POLJINA_ID"]
missing_rows = df3[df3["POLJINA_ID"].isin(missing_poljine.to_numpy())].index
df3.iloc[missing_rows] = df1
df3.iloc[missing_rows]

Unnamed: 0,id_17,GERK_PID,POLJINA_ID,RABA_ID,POVR_GERK_,SIFRA_KMRS,OBRAZEC,INTERSECT,POVR_ar,REGIJA,...,COH_DES_VH_10_22,COH_DES_VV_10_22,SIG_ASC_VH_10_28,SIG_ASC_VV_10_28,SIG_DES_VH_10_28,SIG_DES_VV_10_28,COH_ASC_VH_10_28,COH_ASC_VV_10_28,COH_DES_VH_10_28,COH_DES_VV_10_28
46616,46616,759714,2829278,1100,2304.0624,809,PP,1,23.040624,5,...,0.464669,0.61585,0.011288,0.065602,0.011994,0.086846,0.415111,0.623159,0.363966,0.573629
47144,47144,759714,2803530,1100,2304.0624,207,GP,1,23.040624,5,...,0.464669,0.61585,0.011288,0.065602,0.011994,0.086846,0.415111,0.623159,0.363966,0.573629
57323,57323,1830184,2937936,1100,2981.0183,6,GP,0,15.616768,5,...,0.332144,0.521226,0.016108,0.08992,0.018751,0.104556,0.371957,0.528614,0.338496,0.475566
62652,62652,140213,2349097,1100,1905.3392,801,GP,1,19.053392,5,...,0.42579,0.677048,0.018764,0.116534,0.017879,0.131818,0.399777,0.639833,0.34958,0.637723
62757,62757,140213,2349179,1100,1905.3392,808,PP,1,19.053392,5,...,0.42579,0.677048,0.018764,0.116534,0.017879,0.131818,0.399777,0.639833,0.34958,0.637723
62763,62763,140213,2349267,1100,1905.3392,112,NP,1,19.053392,5,...,0.42579,0.677048,0.018764,0.116534,0.017879,0.131818,0.399777,0.639833,0.34958,0.637723
93813,93813,1744214,3040677,1300,1703.8102,204,GP,0,17.038102,5,...,0.443678,0.554677,0.019175,0.077122,0.022276,0.116621,0.473571,0.573652,0.520477,0.517999
94575,94575,70106,3027566,1100,1909.6832,112,NP,1,19.096832,5,...,0.506524,0.735718,0.020141,0.133595,0.020231,0.152725,0.45115,0.636498,0.483293,0.67153
97683,97683,70106,3022767,1100,1909.6832,33,GP,1,19.096832,5,...,0.506524,0.735718,0.020141,0.133595,0.020231,0.152725,0.45115,0.636498,0.483293,0.67153
109581,109581,689955,2125959,1300,1252.8071,204,GP,0,12.528071,1,...,0.289132,0.561356,0.016988,0.093268,0.006524,0.098531,0.352315,0.450281,0.231252,0.347888


In [93]:
df3.columns.to_list()

['id_17',
 'GERK_PID',
 'POLJINA_ID',
 'RABA_ID',
 'POVR_GERK_',
 'SIFRA_KMRS',
 'OBRAZEC',
 'INTERSECT',
 'POVR_ar',
 'REGIJA',
 'SIG_ASC_VH_04_01',
 'SIG_ASC_VV_04_01',
 'SIG_DES_VH_04_01',
 'SIG_DES_VV_04_01',
 'COH_ASC_VH_04_01',
 'COH_ASC_VV_04_01',
 'COH_DES_VH_04_01',
 'COH_DES_VV_04_01',
 'SIG_ASC_VH_04_07',
 'SIG_ASC_VV_04_07',
 'SIG_DES_VH_04_07',
 'SIG_DES_VV_04_07',
 'COH_ASC_VH_04_07',
 'COH_ASC_VV_04_07',
 'COH_DES_VH_04_07',
 'COH_DES_VV_04_07',
 'SIG_ASC_VH_04_13',
 'SIG_ASC_VV_04_13',
 'SIG_DES_VH_04_13',
 'SIG_DES_VV_04_13',
 'COH_ASC_VH_04_13',
 'COH_ASC_VV_04_13',
 'COH_DES_VH_04_13',
 'COH_DES_VV_04_13',
 'SIG_ASC_VH_04_19',
 'SIG_ASC_VV_04_19',
 'SIG_DES_VH_04_19',
 'SIG_DES_VV_04_19',
 'COH_ASC_VH_04_19',
 'COH_ASC_VV_04_19',
 'COH_DES_VH_04_19',
 'COH_DES_VV_04_19',
 'SIG_ASC_VH_04_25',
 'SIG_ASC_VV_04_25',
 'SIG_DES_VH_04_25',
 'SIG_DES_VV_04_25',
 'COH_ASC_VH_04_25',
 'COH_ASC_VV_04_25',
 'COH_DES_VH_04_25',
 'COH_DES_VV_04_25',
 'SIG_ASC_VH_05_01',
 'SIG_ASC_

In [98]:
# Drop unnecessary columns
df_tidy = df3.drop(columns=df0.iloc[:, 0:9])

df_tidy.tail(3)

Unnamed: 0,id_17,SIG_ASC_VH_04_01,SIG_ASC_VV_04_01,SIG_DES_VH_04_01,SIG_DES_VV_04_01,COH_ASC_VH_04_01,COH_ASC_VV_04_01,COH_DES_VH_04_01,COH_DES_VV_04_01,SIG_ASC_VH_04_07,...,COH_DES_VH_10_22,COH_DES_VV_10_22,SIG_ASC_VH_10_28,SIG_ASC_VV_10_28,SIG_DES_VH_10_28,SIG_DES_VV_10_28,COH_ASC_VH_10_28,COH_ASC_VV_10_28,COH_DES_VH_10_28,COH_DES_VV_10_28
664135,664135,0.021321,0.064066,0.059747,0.358613,0.303795,0.342158,0.334006,0.322785,0.019865,...,0.355053,0.548025,0.02438,0.065607,0.039934,0.218525,0.421177,0.368866,0.247987,0.284292
664136,664136,0.00837,0.022646,0.029279,0.119966,0.27795,0.339892,0.293512,0.292937,0.00793,...,0.338016,0.299834,0.008648,0.023843,0.027773,0.128125,0.378907,0.417516,0.312166,0.233042
664137,664137,0.006835,0.01704,0.030513,0.165918,0.283985,0.33066,0.303999,0.359209,0.008662,...,0.346559,0.366279,0.007152,0.017471,0.031868,0.168384,0.339139,0.344201,0.270885,0.280166


In [99]:
%%time

# Save to csv
df_tidy.to_csv(f".\\results_extracted\\ZV2017_extracted_s1-slc.csv", index=False)

Wall time: 5min 56s


# 4. Extracted HAND for 2017 (DONE)

- tidy up the data
- add "id_17" index

In [None]:
import pandas as pd

In [16]:
%%time

src_pth = f".\\csvs\\ZV2017_extracted_hand.csv"

df0 = pd.read_csv(src_pth)

Wall time: 824 ms


In [17]:
df0

Unnamed: 0.1,Unnamed: 0,GERK_PID,POLJINA_ID,RABA_ID,POVR_GERK_,SIFRA_KMRS,OBRAZEC,INTERSECT,POVR_ar,REGIJA,pix_count,hand_mean,hand_std
0,0,2537988,2104906,1100,19308.9997,801,GP,0,57.930456,5,63,0.429364,0.105657
1,1,2537988,2104908,1100,19308.9997,5,GP,0,30.520097,5,34,0.379970,0.025663
2,2,2537988,2104910,1100,19308.9997,203,GP,0,26.799442,5,30,0.428827,0.117456
3,3,1143420,2105003,1100,8277.3796,801,GP,0,40.952670,5,42,1.999929,0.352233
4,4,3077991,2652377,1211,14987.7976,100,GP,0,149.877976,4,167,5.813277,3.331927
...,...,...,...,...,...,...,...,...,...,...,...,...,...
664133,681156,1892284,2976726,1300,8760.9728,204,GP,0,87.609728,2,92,18.987577,10.833754
664134,681157,3588000,2976727,1300,7807.7304,204,GP,0,78.077304,2,81,23.950169,8.193017
664135,681158,1646627,2976728,1300,7559.2166,204,GP,0,75.592166,2,80,77.466759,24.283596
664136,681159,3588612,2976729,1300,6120.1665,204,GP,0,61.201665,2,65,9.352770,5.397253


In [18]:
# Drop unnecessary columns
df_tidy = df0.drop(columns=df0.iloc[:, 0:11])
   
# Add the "id_17" index column
df_tidy = df_tidy.reset_index()
df_tidy = df_tidy.rename(columns={"index": "id_17"})

df_tidy.tail(3)

Unnamed: 0,id_17,hand_mean,hand_std
664135,664135,77.466759,24.283596
664136,664136,9.35277,5.397253
664137,664137,9.910322,2.914534


In [19]:
%%time

# Save to csv
df_tidy.to_csv(f".\\results_extracted\\ZV2017_extracted_hand.csv", index=False)

Wall time: 2.82 s


# 5. Extracted indices for 2018 and 2019

- Tidy: rename "index" to "id_18" and "id_19" respectively

## 5.1 Year 2018

There should be 680698 rows in each data set

In [1]:
import pandas as pd

In [72]:
%%time

src_pth = f".\\csvs\\ZV2018_extracted_ndvi.csv"

df0 = pd.read_csv(src_pth)

Wall time: 4.41 s


In [75]:
df = df0.drop(columns=df0.iloc[:, 0:9])
df = df.reset_index()
df = df.rename(columns={"index": "id_18"})
df.tail()

Unnamed: 0,id_17,ndvi_04_01,ndvi_04_06,ndvi_04_11,ndvi_04_16,ndvi_04_21,ndvi_04_26,ndvi_05_01,ndvi_05_06,ndvi_05_11,...,ndvi_09_13,ndvi_09_18,ndvi_09_23,ndvi_09_28,ndvi_10_03,ndvi_10_08,ndvi_10_13,ndvi_10_18,ndvi_10_23,ndvi_10_28
680693,680693,,0.638653,0.611491,0.201033,0.812884,,0.84955,0.848288,,...,,0.774726,,0.80453,,,,,0.741961,
680694,680694,,0.532274,0.527314,0.213955,0.719594,,0.795405,0.826601,,...,,0.799814,,0.806531,,,,,0.681984,
680695,680695,,0.362106,,,0.678596,,,,0.377683,...,,0.685558,,0.680902,0.671604,,,,,
680696,680696,,0.525302,,,0.702321,,,,0.687092,...,0.293147,0.784263,,0.791756,0.778464,,,,0.743478,
680697,680697,,0.807609,,,0.81937,,,,0.840772,...,,0.854973,,0.861242,0.824095,,,,0.786069,


In [80]:
suffs = ["evi2", "ndvi", "ndwi", "savi"]
for suff in suffs:
    # Open file
    src_pth = f".\\csvs\\ZV2018_extracted_{suff}.csv"
    df0 = pd.read_csv(src_pth)
    
    # Drop unnecessary columns
    df_tidy = df0.drop(columns=df0.iloc[:, 0:9])  # KMRS data

    # Add the "id_18" index column
    df_tidy = df_tidy.reset_index()
    df_tidy = df_tidy.rename(columns={"index": "id_18"})

    print(df_tidy.tail(1))
    
    # Save to csv
    df_tidy.to_csv(f".\\results_extracted\\ZV2018_extracted_{suff}.csv", index=False)

         id_18  evi2_04_01  evi2_04_06  evi2_04_11  evi2_04_16  evi2_04_21  \
680697  680697         NaN    0.514897         NaN         NaN    0.596118   

        evi2_04_26  evi2_05_01  evi2_05_06  evi2_05_11  ...  evi2_09_13  \
680697         NaN         NaN         NaN    0.657612  ...         NaN   

        evi2_09_18  evi2_09_23  evi2_09_28  evi2_10_03  evi2_10_08  \
680697    0.678097         NaN    0.701305    0.650388         NaN   

        evi2_10_13  evi2_10_18  evi2_10_23  evi2_10_28  
680697         NaN         NaN    0.596261         NaN  

[1 rows x 43 columns]
         id_18  ndvi_04_01  ndvi_04_06  ndvi_04_11  ndvi_04_16  ndvi_04_21  \
680697  680697         NaN    0.807609         NaN         NaN     0.81937   

        ndvi_04_26  ndvi_05_01  ndvi_05_06  ndvi_05_11  ...  ndvi_09_13  \
680697         NaN         NaN         NaN    0.840772  ...         NaN   

        ndvi_09_18  ndvi_09_23  ndvi_09_28  ndvi_10_03  ndvi_10_08  \
680697    0.854973         NaN    0.

## 5.2 Year 2019

There should be 626061 rows in each data set

In [1]:
import pandas as pd

In [82]:
%%time

src_pth = f".\\csvs\\KMRS2019_extracted_ndvi.csv"

df0 = pd.read_csv(src_pth)

Wall time: 4.07 s


In [83]:
df = df0.drop(columns=df0.iloc[:, 0:4])
df = df.reset_index()
df = df.rename(columns={"index": "id_19"})
df.tail()

Unnamed: 0,id_19,ndvi_04_01,ndvi_04_06,ndvi_04_11,ndvi_04_16,ndvi_04_21,ndvi_04_26,ndvi_05_01,ndvi_05_06,ndvi_05_11,...,ndvi_09_08,ndvi_09_13,ndvi_09_18,ndvi_09_23,ndvi_09_28,ndvi_10_08,ndvi_10_13,ndvi_10_18,ndvi_10_23,ndvi_10_28
626057,626057,,,,0.602498,0.665913,,0.742322,,,...,,0.848981,0.356762,,,0.829991,,,,
626058,626058,,,,0.643462,0.663859,,0.734341,,,...,,0.741071,,,,0.764683,,,,
626059,626059,,,,0.363948,0.391153,,0.541706,,,...,,0.544828,,,,0.496529,,,,
626060,626060,,,,0.745655,0.784228,,0.831408,,,...,,0.838207,,,,0.821985,,,,
626061,626061,,,,0.759347,0.793245,,0.854751,0.928058,,...,,0.86095,,,,0.787622,,,,0.201204


In [84]:
suffs = ["evi2", "ndvi", "ndwi", "savi"]
for suff in suffs:
    # Open file
    src_pth = f".\\csvs\\KMRS2019_extracted_{suff}.csv"
    df0 = pd.read_csv(src_pth)
    
    # Drop unnecessary columns
    df_tidy = df0.drop(columns=df0.iloc[:, 0:4])  # KMRS data

    # Add the "id_18" index column
    df_tidy = df_tidy.reset_index()
    df_tidy = df_tidy.rename(columns={"index": "id_19"})

    print(df_tidy.tail(1))
    
    # Save to csv
    df_tidy.to_csv(f".\\results_extracted\\KMRS2019_extracted_{suff}.csv", index=False)

         id_19  evi2_04_01  evi2_04_06  evi2_04_11  evi2_04_16  evi2_04_21  \
626061  626061         NaN         NaN         NaN    0.517521    0.579275   

        evi2_04_26  evi2_05_01  evi2_05_06  evi2_05_11  ...  evi2_09_08  \
626061         NaN    0.660095    0.496133         NaN  ...         NaN   

        evi2_09_13  evi2_09_18  evi2_09_23  evi2_09_28  evi2_10_08  \
626061    0.632914         NaN         NaN         NaN     0.55424   

        evi2_10_13  evi2_10_18  evi2_10_23  evi2_10_28  
626061         NaN         NaN         NaN    0.106855  

[1 rows x 43 columns]
         id_19  ndvi_04_01  ndvi_04_06  ndvi_04_11  ndvi_04_16  ndvi_04_21  \
626061  626061         NaN         NaN         NaN    0.759347    0.793245   

        ndvi_04_26  ndvi_05_01  ndvi_05_06  ndvi_05_11  ...  ndvi_09_08  \
626061         NaN    0.854751    0.928058         NaN  ...         NaN   

        ndvi_09_13  ndvi_09_18  ndvi_09_23  ndvi_09_28  ndvi_10_08  \
626061     0.86095         NaN      

# 6. Extracted s2-rgbn for 2018 and 2019

- Don't rename index
- Only remove unneeded columns

## 6.1 Year 2018

There should be 680698 rows in each data set

In [1]:
import pandas as pd

In [2]:
%%time

src_pth = f".\\csvs\\aZV2018_d96tm_clean_ids_rgbn.csv"

df0 = pd.read_csv(src_pth)

Wall time: 2.41 s


In [4]:
df0.tail(2)

Unnamed: 0,id_18,GERK_PID,RABA_ID,POVR_GERK_,POLJINA_ID,SIFRA_KMRS,OBRAZEC,POVR_ar,INTERSECT,blue_04_01,...,red_10_18,nir_10_18,blue_10_23,green_10_23,red_10_23,nir_10_23,blue_10_28,green_10_28,red_10_28,nir_10_28
114626,680695,5944379,1211,5816.1295,4079093,100,GP,58.162621,0,,...,,,,,,,,,,
114627,680696,4836379,1100,6689.5872,4079099,20,GP,16.568304,0,,...,,,0.070877,0.083625,0.06289,0.359624,,,,


In [6]:
df_tidy = df0.drop(columns=df0.iloc[:, 1:9])
df_tidy.tail()

Unnamed: 0,id_18,blue_04_01,green_04_01,red_04_01,nir_04_01,blue_04_06,green_04_06,red_04_06,nir_04_06,blue_04_11,...,red_10_18,nir_10_18,blue_10_23,green_10_23,red_10_23,nir_10_23,blue_10_28,green_10_28,red_10_28,nir_10_28
114623,680666,,,,,0.104746,0.128282,0.130777,0.296457,,...,0.098429,0.151888,,,,,0.265292,0.234878,0.199835,0.295654
114624,680669,,,,,0.085881,0.098508,0.099785,0.286889,0.062532,...,,,0.072267,0.090111,0.064208,0.322389,,,,
114625,680694,,,,,0.074788,0.091805,0.1003,0.311365,0.068055,...,,,0.072361,0.093147,0.080779,0.36352,,,,
114626,680695,,,,,0.081931,0.096172,0.112594,0.208005,,...,,,,,,,,,,
114627,680696,,,,,0.071619,0.085058,0.086243,0.254237,,...,,,0.070877,0.083625,0.06289,0.359624,,,,


In [7]:
# Save to csv
df_tidy.to_csv(f".\\results_extracted\\ZV2018_extracted_s2-rgbn.csv", index=False)

## 6.2 Year 2019

There should be 626061 rows in each data set

In [8]:
import pandas as pd

In [9]:
%%time

src_pth = f".\\csvs\\aKMRS2019_d96tm_clean_ids_rgbn.csv"

df0 = pd.read_csv(src_pth)

Wall time: 2.36 s


In [10]:
df0.tail(2)

Unnamed: 0,id_19,GERK_PID,POLJINA_ID,SIFRA_KMRS,blue_04_01,green_04_01,red_04_01,nir_04_01,blue_04_06,green_04_06,...,red_10_18,nir_10_18,blue_10_23,green_10_23,red_10_23,nir_10_23,blue_10_28,green_10_28,red_10_28,nir_10_28
111886,626043,3703609,5072640,699,0.063915,0.08004,0.084677,0.263924,,,...,0.056515,0.283947,,,,,0.147926,0.163969,0.15613,0.277193
111887,626054,6097344,5079147,5,,,,,0.050041,0.071638,...,0.077625,0.19918,,,,,,,,


In [12]:
df_tidy = df0.drop(columns=df0.iloc[:, 1:4])
df_tidy.tail()

Unnamed: 0,id_19,blue_04_01,green_04_01,red_04_01,nir_04_01,blue_04_06,green_04_06,red_04_06,nir_04_06,blue_04_11,...,red_10_18,nir_10_18,blue_10_23,green_10_23,red_10_23,nir_10_23,blue_10_28,green_10_28,red_10_28,nir_10_28
111883,626020,,,,,0.243413,0.245845,0.245884,0.37788,,...,0.137803,0.231506,,,,,,,,
111884,626030,,,,,,,,,,...,,,,,,,,,,
111885,626042,0.089221,0.117534,0.148341,0.242135,,,,,,...,0.102181,0.275552,,,,,0.185409,0.201066,0.195049,0.296492
111886,626043,0.063915,0.08004,0.084677,0.263924,,,,,,...,0.056515,0.283947,,,,,0.147926,0.163969,0.15613,0.277193
111887,626054,,,,,0.050041,0.071638,0.090383,0.31772,,...,0.077625,0.19918,,,,,,,,


In [13]:
%%time
    
# Save to csv
df_tidy.to_csv(f".\\results_extracted\\KMRS2019_extracted_s2-rgbn.csv", index=False)

Wall time: 19.5 s


# 7. Extracted S1-slc for 2018 (DONE)

Clean unneeded columns. There are some NaN values that will be delt with in monthly interpolation.

In [14]:
%%time
import pandas as pd

s1_pth = f".\\csvs\\bZV2018_d96tm_clean_ids_slc.csv"

df1 = pd.read_csv(s1_pth)

Wall time: 6.91 s


In [27]:
df1.head(2)

Unnamed: 0,id_18,GERK_PID,RABA_ID,POVR_GERK_,POLJINA_ID,SIFRA_KMRS,OBRAZEC,POVR_ar,INTERSECT,SIG_ASC_VH_04_01,...,COH_DES_VH_10_22,COH_DES_VV_10_22,SIG_ASC_VH_10_28,SIG_ASC_VV_10_28,SIG_DES_VH_10_28,SIG_DES_VV_10_28,COH_ASC_VH_10_28,COH_ASC_VV_10_28,COH_DES_VH_10_28,COH_DES_VV_10_28
0,1,4896286,1100,1187.8658,3280115,405,GP,11.878907,0,0.021503,...,0.243379,0.296769,0.018704,0.183421,0.027247,0.176039,0.63972,0.718057,0.285368,0.596231
1,6,1083878,1100,7093.8082,3094837,5,GP,34.573176,1,0.015046,...,0.23471,0.273542,0.029935,0.222554,0.030716,0.36091,0.526789,0.756344,0.432395,0.496262


In [29]:
# Drop unnecessary columns
df_tidy = df1.drop(columns=df1.iloc[:, 1:9])

df_tidy.tail(3)

Unnamed: 0,id_18,SIG_ASC_VH_04_01,SIG_ASC_VV_04_01,SIG_DES_VH_04_01,SIG_DES_VV_04_01,COH_ASC_VH_04_01,COH_ASC_VV_04_01,COH_DES_VH_04_01,COH_DES_VV_04_01,SIG_ASC_VH_04_07,...,COH_DES_VH_10_22,COH_DES_VV_10_22,SIG_ASC_VH_10_28,SIG_ASC_VV_10_28,SIG_DES_VH_10_28,SIG_DES_VV_10_28,COH_ASC_VH_10_28,COH_ASC_VV_10_28,COH_DES_VH_10_28,COH_DES_VV_10_28
114625,680694,0.015966,0.055539,0.079303,0.316503,0.422478,0.352962,0.34554,0.331329,0.019539,...,0.242983,0.352986,0.021054,0.040198,0.093014,1.096049,0.321002,0.232757,0.422077,0.376429
114626,680695,0.01209,0.073179,0.024907,0.161575,0.40008,0.641991,0.505537,0.762077,0.014889,...,0.301089,0.37563,0.016575,0.073304,0.023659,0.09927,0.314066,0.465852,0.377464,0.456217
114627,680696,0.01267,0.077836,0.020482,0.078194,0.292865,0.471435,0.308208,0.293562,0.014387,...,0.335093,0.39781,0.018259,0.072972,0.030438,0.098792,0.333229,0.424934,0.307876,0.416933


In [30]:
%%time

# Save to csv
df_tidy.to_csv(f".\\results_extracted\\ZV2018_extracted_s1-slc.csv", index=False)

Wall time: 55.5 s


# 8. Extracted S1-slc for 2019 (DONE)

Clean unneeded columns. There are some NaN values that will be delt with in monthly interpolation.

In [1]:
%%time
import pandas as pd

s1_pth = f".\\csvs\\bKMRS2019_d96tm_clean_ids_slc.csv"

df1 = pd.read_csv(s1_pth)

Wall time: 14 s


In [2]:
df1.head(2)

Unnamed: 0,id_19,GERK_PID,POLJINA_ID,SIFRA_KMRS,SIG_ASC_VH_04_01,SIG_ASC_VV_04_01,SIG_DES_VH_04_01,SIG_DES_VV_04_01,COH_ASC_VH_04_01,COH_ASC_VV_04_01,...,COH_DES_VH_10_22,COH_DES_VV_10_22,SIG_ASC_VH_10_28,SIG_ASC_VV_10_28,SIG_DES_VH_10_28,SIG_DES_VV_10_28,COH_ASC_VH_10_28,COH_ASC_VV_10_28,COH_DES_VH_10_28,COH_DES_VV_10_28
0,9,3366021,4088919,100,0.020135,0.089681,0.025359,0.113961,0.207774,0.317133,...,0.258645,0.318927,0.034445,0.104209,0.024351,0.119893,0.323609,0.297022,0.271154,0.241363
1,12,3365931,4088922,100,0.025754,0.110617,0.029655,0.08108,0.278651,0.323784,...,0.375712,0.288501,0.023414,0.083092,0.029539,0.109738,0.396411,0.314191,0.281515,0.288947


In [4]:
# Drop unnecessary columns
df_tidy = df1.drop(columns=df1.iloc[:, 1:4])

df_tidy.tail(3)

Unnamed: 0,id_19,SIG_ASC_VH_04_01,SIG_ASC_VV_04_01,SIG_DES_VH_04_01,SIG_DES_VV_04_01,COH_ASC_VH_04_01,COH_ASC_VV_04_01,COH_DES_VH_04_01,COH_DES_VV_04_01,SIG_ASC_VH_04_07,...,COH_DES_VH_10_22,COH_DES_VV_10_22,SIG_ASC_VH_10_28,SIG_ASC_VV_10_28,SIG_DES_VH_10_28,SIG_DES_VV_10_28,COH_ASC_VH_10_28,COH_ASC_VV_10_28,COH_DES_VH_10_28,COH_DES_VV_10_28
111885,626042,0.022681,0.121705,0.011598,0.070266,0.271903,0.423134,0.262441,0.391141,0.053226,...,0.28668,0.35237,0.038557,0.164237,0.023545,0.084672,0.272378,0.357639,0.297521,0.326979
111886,626043,0.053472,0.234808,0.022507,0.059021,0.19025,0.535585,0.262182,0.270252,0.058037,...,0.288292,0.326793,0.061846,0.269675,0.018982,0.060183,0.319101,0.346832,0.256712,0.321693
111887,626054,0.013006,0.072728,0.007915,0.038693,0.350579,0.418133,0.279014,0.421297,0.015509,...,0.339224,0.439304,0.030884,0.207175,0.027955,0.094833,0.377051,0.531453,0.315086,0.367979


In [5]:
%%time

# Save to csv
df_tidy.to_csv(f".\\results_extracted\\KMRS2019_extracted_s1-slc.csv", index=False)

Wall time: 1min 13s
