# Spectral Indices

*by Felix*

Notebook to test how the calculation of the indices works. For actually calculating the indices for the data please use the feature_engineering.py.

We first need to import all the needed modules.

In [4]:
# Load libraries
import numpy as np
import pandas as pd
import os

Set Working directory

In [5]:
#path = os.getcwd()
path = '/Users/felixbehrendt/neuefische/Radiant-Earth-Spot-Crop/'
# Set Workign directory and print
os.chdir(path)
print(f'Current Working directory: {path}')

Current Working directory: /Users/felixbehrendt/neuefische/Radiant-Earth-Spot-Crop/


Function to calculate the spectral indices. Can be found in spectral_indices.py

In [6]:
# NDVI
def cal_NDVI(Band4: pd.Series, Band8: pd.Series) -> pd.Series:
    """Takes the Bands of Sentinel2 and returns the NDVI

    Args:
        Band4 (pd.Series): Band 4 of Sentinel 2
        Band8 (pd.Series): Band 8 of Sentinel 2

    Returns:
        pd.Series: Calculated NDVI
    """
    return (Band8 - Band4) / (Band8 + Band4) 

# SIPI2
def cal_SIPI2(Band2: pd.Series, Band8: pd.Series, Band4:pd.Series) -> pd.Series:
    """Takes the Bands of Sentinel2 and returns the SIPI2

    Args:
        Band2 (pd.Series):  Band 2 of Sentinel 2
        Band8 (pd.Series):  Band 8 of Sentinel 2
        Band4 (pd.Series):  Band 4 of Sentinel 2

    Returns:
        pd.Series: Calculated SIPI
    """
    return (Band8 - Band2) / (Band8 - Band4)

# WET
def cal_WET(Band2: pd.Series, Band3: pd.Series, Band4:pd.Series, Band8: pd.Series, Band11: pd.Series, Band12:pd.Series) -> pd.Series:
    """Takes the Bands of Sentinel2 and returns the WET

    Args:
        Band2 (pd.Series): Band 2 of Sentinel 2
        Band3 (pd.Series): Band 3 of Sentinel 2
        Band4 (pd.Series): Band 4 of Sentinel 2
        Band8 (pd.Series): Band 8 of Sentinel 2
        Band11 (pd.Series): Band 11 of Sentinel 2
        Band12 (pd.Series): Band 12 of Sentinel 2

    Returns:
        pd.Series: Calculated WET
    """
    return 0.1509 * Band2 +0.1973 * Band3 + 0.3279 * Band4 + 0.3406 * Band8 - 0.7112 * Band11 - 0.4572 * Band12

# PVR
def cal_PVR(Band3: pd.Series, Band4: pd.Series) -> pd.Series:
    """Takes the Bands of Sentinel2 and returns the PVR

    Args:
        Band3 (pd.Series): Band 3 of Sentinel 2
        Band4 (pd.Series): Band 4 of Sentinel 2

    Returns:
        pd.Series:  Calculated PVR
    """
    return (Band3 - Band4) / (Band3 + Band4) 

Function in the feature_engineering.py

In [9]:
def cal_spectral_indices(df:pd.DataFrame) -> pd.DataFrame:
    """Takes the Data and add additional features:
        * NDVI
        * WET
        * PVR

    Args:
        df (pd.DataFrame): Full Dataset

    Returns:
        pd.DataFrame: Full Dataset with  spectral indices
    """

    # Define list with all bands
    Bands = ['B02', 'B03', 'B04', 'B08', 'B11', 'B12']

    # calculate Indices and PC1
    df['NDVI'] = cal_NDVI(df.B04, df.B08)
    df['WET'] = cal_WET(df.B02,df.B03, df.B04, df.B08, df.B11, df.B12)
    df['PVR'] = cal_PVR(df.B03, df.B04)

    # Fill NA values with zero
    # df = df.fillna(value=0)
    return df 

Load dataset

In [32]:
df = pd.read_csv('data/data_with_unknown.csv')
df_wo = pd.read_csv('data/data_without_unknown.csv')
df.head()

Unnamed: 0,field_id,date,label,B02,B03,B04,B08,B11,B12
0,1,2017-04-01,4,21.934084,29.180065,35.55466,62.490353,68.3971,46.04019
1,1,2017-04-11,4,14.844051,23.114147,30.607718,58.736336,73.43569,48.863342
2,1,2017-04-21,4,13.385852,21.596462,29.223473,57.065918,73.66881,49.313503
3,1,2017-05-01,4,15.408361,22.471062,29.371382,56.434082,71.05788,46.557877
4,1,2017-05-11,4,54.829582,65.73955,72.90675,95.67203,66.14791,58.643085


In [33]:
# calculate spectral indices
df_spectral = cal_spectral_indices(df)
df_spectral_wo = cal_spectral_indices(df_wo)

In [36]:
def drop_na(df:pd.DataFrame, verbose:bool = False) -> pd.DataFrame:
    """_summary_

    Args:
        df (pd.DataFrame): Data with all features
        verbose (bool, optional): Print information about loose of information (rows). Defaults to False.

    Returns:
        pd.DataFrame: Data without NA
    """
    df_wo_NA = df.dropna(axis = 0)
    
    # Print Loose of information
    if verbose:
        print(f'Rows without NA:               {df_wo_NA.shape[0]}')
        print(f'Rows of Origin:                {df.shape[0]}')
        print(f'Precentage of remaining Data:  {round((df_wo_NA.shape[0] / df.shape[0]) * 100, 3)} %')

    return df_wo_NA

In [37]:
# Drop Rows with NA values
df_1 = drop_na(df_spectral, verbose=True)
df_2 = drop_na(df_spectral_wo, verbose=True)

Rows without NA:               4255238
Rows of Origin:                4301227
Precentage of remaining Data:  98.931 %
Rows without NA:               1472994
Rows of Origin:                1473025
Precentage of remaining Data:  99.998 %


In [38]:
# save data
#df_1.to_csv('data/spectral_wo_NA.csv', index = False)
#df_2.to_csv('data/spectral_wo_NA_02.csv', index=False)