In [2]:
import numpy as np
import pandas as pd

from sklearn.base import TransformerMixin, BaseEstimator
from sklearn.pipeline import make_pipeline
from sklearn.impute import SimpleImputer
from sklearn.ensemble import HistGradientBoostingClassifier

In [61]:
df_ndvi = pd.read_csv("data/train/NDVI.csv", encoding="windows-1251").drop(columns=["index"])
df_nir = pd.read_csv("data/train/B8A.csv", encoding="windows-1251").drop(columns=["index", "culture"]).add_suffix("_nir")
df_swir = pd.read_csv("data/train/B12.csv", encoding="windows-1251").drop(columns=["index", "culture"]).add_suffix("_swir")
df_red = pd.read_csv("data/train/B04.csv", encoding="windows-1251").drop(columns=["index", "culture"]).add_suffix("_red")
df_VegRedEdge = pd.read_csv("data/train/B05.csv", encoding="windows-1251").drop(columns=["index", "culture"]).add_suffix("_vegRedEdge")
df_blue = pd.read_csv("data/train/B02.csv", encoding="windows-1251").drop(columns=["index", "culture"]).add_suffix("_blue")
df_green = pd.read_csv("data/train/B03.csv", encoding="windows-1251").drop(columns=["index", "culture"]).add_suffix("_green")

labels = df_ndvi["culture"]
df_ndvi.drop(columns=["culture"], inplace=True)

data = pd.concat([df_ndvi, df_nir, df_swir, df_red, df_VegRedEdge, df_blue, df_green], axis=1)

{'121',
 '121_blue',
 '121_green',
 '121_nir',
 '121_red',
 '121_swir',
 '121_vegRedEdge',
 '128',
 '128_blue',
 '128_green',
 '128_nir',
 '128_red',
 '128_swir',
 '128_vegRedEdge',
 '135',
 '135_blue',
 '135_green',
 '135_nir',
 '135_red',
 '135_swir',
 '135_vegRedEdge',
 '142',
 '142_blue',
 '142_green',
 '142_nir',
 '142_red',
 '142_swir',
 '142_vegRedEdge',
 '149',
 '149_blue',
 '149_green',
 '149_nir',
 '149_red',
 '149_swir',
 '149_vegRedEdge',
 '156',
 '156_blue',
 '156_green',
 '156_nir',
 '156_red',
 '156_swir',
 '156_vegRedEdge',
 '163',
 '163_blue',
 '163_green',
 '163_nir',
 '163_red',
 '163_swir',
 '163_vegRedEdge',
 '170',
 '170_blue',
 '170_green',
 '170_nir',
 '170_red',
 '170_swir',
 '170_vegRedEdge',
 '177',
 '177_blue',
 '177_green',
 '177_nir',
 '177_red',
 '177_swir',
 '177_vegRedEdge',
 '184',
 '184_blue',
 '184_green',
 '184_nir',
 '184_red',
 '184_swir',
 '184_vegRedEdge',
 '191',
 '191_blue',
 '191_green',
 '191_nir',
 '191_red',
 '191_swir',
 '191_vegRedEdge',

In [67]:
from numpy import ndarray


class VegetationIndexAdder(BaseEstimator, TransformerMixin):
    def __init__(self, ndwi: bool = True, arvi: bool = True, sawi: bool = True, gemi: bool = True, ndre: bool = True, gndwi: bool = True, evi: bool = True, msavi: bool = True) -> None:
        super().__init__()

        self.N_DAYS = 26
        self.DAYS = ['121', '128', '135', '142', '149', '156', '163', '170', '177', '184', '191', '198', '205', 
                                '212', '219', '226', '233', '240', '247', '254', '261', '268', '275', '282', '289', '296']
        
        self.NDVI_START = 0
        self.NIR_START = 26
        self.SWIR_START = 52
        self.RED_START = 78
        self.VEG_REG_EDGE_START = 104
        self.BLUE_START = 130
        self.GREEN_START = 156

        self.ndwi = ndwi
        self.arvi = arvi
        self.sawi = sawi
        self.gemi = gemi
        self.ndre = ndre
        self.gndwi = gndwi
        self.evi = evi
        self.msavi = msavi



    def fit(self, X, y=None):
        return self
    
    def transform(self, X: pd.DataFrame, y=None):
        for column in X.columns:
            X[column] = X[column].fillna(X[column].mean())

        support_data: pd.DataFrame = pd.DataFrame()
        for day in self.DAYS:
            nir = X[f"{day}_nir"]
            red = X[f"{day}_red"]
            blue = X[f"{day}_blue"]
            swir = X[f"{day}_swir"]
            veg_reg_edge = X[f"{day}_vegRegEdge"]
            green = X[f"{day}_green"]

            discriminant = 2 * nir

            E = (2 * (nir ** 2 - red ** 2) + 1.5 * nir + 0.5 * red) / (nir + red + 0.5)
            Rb = red - (red - blue)
            support_data[f"{day}_msavi"] = (2 * nir + 1) * (2 * nir + 1) - 8 * (nir - red) 

            X[f"{day}_ndwi"] = (nir - swir) / (nir + swir)
            X[f"{day}_arvi"] = (nir - Rb) / (nir + Rb)
            X[f"{day}_sawi"] = (nir - red) / (nir + red - L[f"{day}_L"]) * (1 + L[f"{day}_L"])
            X[f"{day}_gemi"] = E * (1 - 0.25 * E) - ((red - 0.125) / (1 - red))
            X[f"{day}_ndre"] = (nir - veg_reg_edge) / (nir + veg_reg_edge)
            X[f"{day}_gndwi"] = (nir - green) / (nir + green)
            X[f"{day}_evi"] = 2.5 * (nir - red) / (nir + 6 * red - 7.5 * blue + 1)




        return X.copy()
    
adder = VegetationIndexAdder()
adder.fit_transform(data)["233_ndwi"]

0       0.434826
1       0.479277
2       0.426582
3       0.428022
4       0.208784
          ...   
7256    0.370345
7257    0.179785
7258    0.326885
7259    0.351977
7260    0.323720
Name: 233_ndwi, Length: 7261, dtype: float64