# Mean per month -> Transformation of date column to features

*by Felix*

Notebook to explain Transformation of date column to features of a specific date. For actually calculating the indices for the data please use the feature_engineering.py.

We first need to import all the needed modules.

In [83]:
# Load libraries
import numpy as np
import pandas as pd
import os

Set Working directory

In [84]:
#path = os.getcwd()
path = '/Users/felixbehrendt/neuefische/Radiant-Earth-Spot-Crop/'
# Set Workign directory and print
os.chdir(path)
print(f'Current Working directory: {path}')

Current Working directory: /Users/felixbehrendt/neuefische/Radiant-Earth-Spot-Crop/


In [85]:
# Load dataset
df = pd.read_csv('data/spectral_wo_NA.csv')
df_wo = pd.read_csv('data/spectral_wo_NA_02.csv')
df.head()

Unnamed: 0,field_id,date,label,B02,B03,B04,B08,B11,B12,NDVI,WET,PVR
0,1,2017-04-01,4,21.934084,29.180065,35.55466,62.490353,68.3971,46.04019,0.274728,-27.683925,-0.098473
1,1,2017-04-11,4,14.844051,23.114147,30.607718,58.736336,73.43569,48.863342,0.314835,-37.725527,-0.139488
2,1,2017-04-21,4,13.385852,21.596462,29.223473,57.065918,73.66881,49.313503,0.322664,-39.639456,-0.150079
3,1,2017-05-01,4,15.408361,22.471062,29.371382,56.434082,71.05788,46.557877,0.315396,-36.211639,-0.133102
4,1,2017-05-11,4,54.829582,65.73955,72.90675,95.67203,66.14791,58.643085,0.135042,3.880202,-0.051694


In [86]:
# Change date Format and add month and days since beginning growing season added
def calculate_mean_month_field(df:pd.DataFrame) -> pd.DataFrame:
  """Calculate the mean for each month and field_id

  Args:
      df (pd.DataFrame): Data with all features 

  Returns:
      pd.DataFrame: Data the mean for each month and field_id
  """

  # Change datatype str to Datetime of timecolumn
  df['date'] = pd.to_datetime(df['date'])
  # create relevant subset -> Calculate the month in the year
  df['month'] = df['date'].dt.month
  # Change month int to str for later column naming
  df['month'] = df['month'].apply(str)  
  
  # calculate mean for each month for each field_id
  return df.groupby(by=['field_id','month']).mean().reset_index()

def combine_feature_date(df:pd.DataFrame) -> pd.DataFrame:
  """ Transform the date (month) to each feature, so feature and time is combined

  Args:
      df (pd.DataFrame): data with mean per month and per field

  Returns:
      pd.DataFrame: returned transformed data
  """
  # get list of features
  features = list(set(df) - set(['field_id', 'month', 'label']))

  # pivot for each month over field id --> Combine feature and Time
  df_res = df.pivot(index='field_id', columns='month', values=features).reset_index()

  # change column names
  df_res.columns = ['_'.join(col).strip() for col in df_res.columns.values]

  # Left join with labels 
  merge = df_res.merge(df[['field_id', 'label']].applymap(int), left_on='field_id_', right_on='field_id')

  # drop field_id_ column
  return merge.drop('field_id_', axis =1)

def feat_engi_date(df:pd.DataFrame) -> pd.DataFrame:
  """includes two main function --> Full Feature Engineering
      * Calculate the mean for each month and field_id
      * Transform the date (month) to each feature, so feature and time is combined

  Args:
      df (pd.DataFrame): initial dataset

  Returns:
      pd.DataFrame: transformed dataset
  """
  return combine_feature_date(calculate_mean_month_field(df))


In [87]:
# Transform data
df_after_FE = feat_engi_date(df)
df_after_FE_wo = feat_engi_date(df_wo)


In [89]:
# Save data
#df_after_FE.to_csv('data/data_afterFE.csv', index=False)
#df_after_FE.to_csv('data/data_afterFE_wo.csv', index=False)
