In [2]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import numpy as np


In [3]:
# Load the CSV into a pandas DataFrame
ref_data = pd.read_csv('merged_soc_reflectance.csv')


In [4]:
# number of columns
num_columns = len(ref_data.columns)
print(f"Number of columns: {num_columns}")

Number of columns: 236


In [13]:
# Load the Landcover CSV file containing POINTID and Landcover_code
landcover_df = pd.read_csv('LandCover_LUCAS.csv')  

# Merge the two DataFrames based on the POINTID column
ref_data = pd.merge(ref_data, landcover_df[['POINTID', 'CLCplus_RASTER_2021_010m_03035']], on='POINTID', how='left')

In [15]:
print(ref_data)


     POINTID  SOC_Stock_Mg_ha    band1    band2    band3    band4    band5  \
0   41242442         0.023150  0.02943  0.01463  0.01976  0.02014  0.01884   
1   41262440         0.031018  0.03206  0.03845  0.03561  0.02911  0.03081   
2   41342434         0.036446  0.06394  0.05658  0.05853  0.06130  0.06725   
3   41362364         0.049764  0.04360  0.02226  0.02136  0.01985  0.02332   
4   41382456         0.062281  0.02666  0.00763  0.00941  0.00970  0.01331   
..       ...              ...      ...      ...      ...      ...      ...   
92  42322402         0.066613  0.04398  0.03977  0.03334  0.02603  0.02783   
93  42342404         0.048580  0.02744  0.02770  0.02695  0.01711  0.02048   
94  42362424         0.046636  0.03888  0.03568  0.03330  0.02559  0.03069   
95  42362424         0.046636  0.04044  0.03493  0.03549  0.03157  0.03476   
96  42582462         0.042827  0.03284  0.01659  0.00897  0.00407  0.01804   

      band6    band7    band8  ...  band226  band227  band228  

In [29]:
# Extract the second column (SOC data)
soc_data = ref_data.iloc[:, 1].values  # Index 1 corresponds to the SOC column

# Reshape the SOC data to have dimensions (n_samples, 1)
soc_data = soc_data.reshape(-1, 1)

# Extract the Landcover code column as a numpy array
landcover_code = ref_data.iloc[:, -1].values.reshape(-1, 1)

In [30]:
# Select the reflectance bands (not the column of index 0 which is SOC) but the (columns from index 1 to the end (VNIR and SWIR bands))
Prisma_reflectance_data = ref_data.iloc[:, 2:-1]


In [31]:
print(ref_data.shape)
print(soc_data.shape)
print(Prisma_reflectance_data.shape)
print(landcover_code.shape)

(97, 237)
(97, 1)
(97, 234)
(97, 1)


In [32]:
# Normalize the reflectance values and SOC
scaler_reflec = MinMaxScaler()
reflec_scaled = scaler_reflec.fit_transform(Prisma_reflectance_data)

scaler_SOC = MinMaxScaler()
SOC_scaled = scaler_SOC.fit_transform(soc_data)

In [34]:
print(reflec_scaled)
print(SOC_scaled)

[[0.01356267 0.00804431 0.01271872 ... 0.04042917 0.00730401 0.06257966]
 [0.0165526  0.03541796 0.03029429 ... 0.18426372 0.08294108 0.46800918]
 [0.05279553 0.05625273 0.05570957 ... 0.13497123 0.02402207 0.        ]
 ...
 [0.02430595 0.03223471 0.02773281 ... 0.18162028 0.03684467 0.        ]
 [0.02607944 0.03137282 0.03016123 ... 0.04851501 0.         0.        ]
 [0.01743935 0.01029672 0.00075403 ... 0.01104027 0.01298491 0.01159827]]
[[0.20052059]
 [0.2892537 ]
 [0.35047112]
 [0.50067518]
 [0.64183132]
 [0.52974244]
 [0.70446752]
 [0.65084101]
 [0.17158156]
 [0.57229542]
 [0.67213235]
 [0.12760688]
 [0.52442229]
 [0.53236567]
 [0.18386993]
 [0.49921895]
 [0.69166566]
 [0.2522281 ]
 [0.10181213]
 [0.30418983]
 [0.23818888]
 [0.2917797 ]
 [0.33883694]
 [0.19305892]
 [0.19995669]
 [0.31386321]
 [0.31861814]
 [0.36552118]
 [0.37387708]
 [0.37387708]
 [0.45051852]
 [0.38908212]
 [0.11885074]
 [0.45051852]
 [0.32691171]
 [0.50927163]
 [0.44369835]
 [0.65089115]
 [0.6471574 ]
 [0.647157

In [35]:
# Concatenate the SOC column with the normalized reflectance data by adding soc in last column
combined_data_prisma = np.concatenate([reflec_scaled, SOC_scaled, landcover_code], axis=1)

In [36]:
normalized_reflectance_prisma = pd.DataFrame(combined_data_prisma) # Convert the normalized numpy array back to a pandas DataFrame
normalized_reflectance_prisma.to_csv('normalized_reflectance_prisma.csv', index=False) # saving the normalized reflectance data in csv form
print(normalized_reflectance_prisma.shape)

(97, 236)


In [37]:
print(normalized_reflectance_prisma)

         0         1         2         3         4         5         6    \
0   0.013563  0.008044  0.012719  0.017625  0.009432  0.013224  0.014725   
1   0.016553  0.035418  0.030294  0.027464  0.023017  0.029424  0.030755   
2   0.052796  0.056253  0.055710  0.062769  0.064376  0.063972  0.072894   
3   0.029672  0.016813  0.014493  0.017307  0.014516  0.011369  0.011339   
4   0.010414  0.000000  0.001242  0.006175  0.003155  0.000000  0.007132   
..       ...       ...       ...       ...       ...       ...       ...   
92  0.030104  0.036935  0.027777  0.024086  0.019635  0.028881  0.029146   
93  0.011300  0.023064  0.020691  0.014302  0.011293  0.018847  0.021610   
94  0.024306  0.032235  0.027733  0.023603  0.022881  0.035091  0.036211   
95  0.026079  0.031373  0.030161  0.030162  0.027501  0.043892  0.046841   
96  0.017439  0.010297  0.000754  0.000000  0.008524  0.007330  0.004477   

         7         8         9    ...       226       227       228       229  \
0   0.

In [38]:
print(normalized_reflectance_prisma.shape)

(97, 236)


In [39]:
print(landcover_code)

[[ 7.]
 [ 7.]
 [ 7.]
 [ 7.]
 [ 6.]
 [ 7.]
 [10.]
 [ 6.]
 [ 7.]
 [ 3.]
 [ 6.]
 [ 7.]
 [ 7.]
 [ 6.]
 [ 6.]
 [ 7.]
 [ 6.]
 [ 3.]
 [ 5.]
 [ 7.]
 [ 7.]
 [ 6.]
 [ 5.]
 [ 6.]
 [ 7.]
 [ 7.]
 [ 6.]
 [ 6.]
 [ 3.]
 [ 3.]
 [ 6.]
 [ 5.]
 [ 7.]
 [ 7.]
 [ 3.]
 [ 6.]
 [ 7.]
 [ 3.]
 [ 3.]
 [ 3.]
 [ 5.]
 [ 7.]
 [ 7.]
 [ 7.]
 [ 6.]
 [ 5.]
 [ 5.]
 [ 3.]
 [ 7.]
 [ 3.]
 [ 3.]
 [ 3.]
 [ 5.]
 [ 6.]
 [ 7.]
 [ 7.]
 [ 6.]
 [ 6.]
 [ 3.]
 [ 3.]
 [ 5.]
 [ 7.]
 [ 3.]
 [ 3.]
 [ 3.]
 [ 3.]
 [ 3.]
 [ 3.]
 [ 7.]
 [ 3.]
 [ 5.]
 [ 6.]
 [ 6.]
 [ 6.]
 [ 3.]
 [ 7.]
 [ 6.]
 [ 6.]
 [ 7.]
 [ 7.]
 [ 7.]
 [ 7.]
 [ 7.]
 [ 6.]
 [ 6.]
 [ 6.]
 [ 7.]
 [ 7.]
 [ 7.]
 [ 3.]
 [ 6.]
 [ 3.]
 [ 3.]
 [ 6.]
 [ 7.]
 [ 7.]
 [ 6.]]
