<a href="https://colab.research.google.com/github/wurDevTim/Workshop_P4P/blob/main/correcting_meassurement_time.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Correcting for meassurement time
workshop part 2


## setup
Next to python, R code can also be used in colab notebooks.
Here we call R from python instead with rpy2 to enable us to use python code as well.
An R cell is marked with '%%R' at the top of the cell.

In [4]:
%load_ext rpy2.ipython

In [5]:
# indicate that you're running R code
%%R

# Install packages
install.packages("LMMsolver")


(as ‘lib’ is unspecified)







	‘/tmp/RtmpEbqQZs/downloaded_packages’



In [6]:
# Import
%%R
library(LMMsolver)

In [7]:
# Mount google drive - not found an R alternative, using python instead.
from google.colab import drive
from os import path

datafolder = "/content/drive/My Drive/P4P_workshop_data"
# Check if the data folder is mounted correctly
if not path.exists(datafolder):
  drive.mount('/content/drive')

!ls "$datafolder"

Mounted at /content/drive
 cropreporter_traits.csv  'Enza Mandy Boon'   Lucia   npec_tomato


In [8]:
# Function which uses the LMM solver to compute the spline.
%%R

compute_spline <- function(df, plant_identifier, trait_list)
{
  ### Fit 1D spline per plant
  for (i in c(1:length(unique(df[[plant_identifier]])))){
    plant_id = unique(df[[plant_identifier]])[i]
    one_plant <- df[df[[plant_identifier]] == plant_id,]
    datenum = one_plant[['datenum']]
    preddates <- data.frame(datenum = min(one_plant$date):max(one_plant$date))
    # Each day has 24*60*60 = 86400 hours
    preddates <- preddates * 86400

    # Fit 1D spline per trait
    for (trait in trait_list){
      # Nan values will be removed, but they do cause warnings.
      m1 <- LMMsolve(fixed = as.formula(paste(trait, "~", 1)),
                     spline = ~spl1D(x = datenum, nseg = 20),
                     data = one_plant)
      summary(m1)
      prediction <- obtainSmoothTrend(m1, newdata = preddates,
                              includeIntercept = T)
      # Rename ypred column
      names(prediction)[names(prediction) == 'ypred'] <- trait

      # Combine results
      if (trait == trait_list[1]){
        plant_predictions <- prediction
      } else {
        plant_predictions <- merge(plant_predictions, prediction, by='datenum')
      }
    }
    plant_predictions[[plant_identifier]] = plant_id
    if (i == 1){
      all_predictions <- plant_predictions
    } else {

      all_predictions <- rbind(all_predictions, plant_predictions)
    }
  }
  return(all_predictions)
}

# Example
In this example the cropreporter data from Lucia is used, which has been analysed beforehand.

Note: Systems like the cropreporter in NPEC use the local time. If your unlucky your experiment contains both winter & summer time. In this case we would advice to switch to UTC.

In [13]:
# Load the data
%%R
df <- read.csv('/content/drive/My Drive/P4P_workshop_data/cropreporter_traits.csv', sep = ",")
head(df)

                                                                                                                                                                                                               filename
1     D:/temp/p4p/Lucia\\PMD_PML_(Control_vs_Drought_one_genotype)\\PMD_protocol\\20230708\\NPEC52.20230605.BD22.CE3027.Control.2\\CropReporter\\163111171\\data\\HDR_90_NPEC52.20230605.BD22.CE3027.Control.2_1368.INF
2       D:/temp/p4p/Lucia\\PMD_PML_(Control_vs_Drought_one_genotype)\\PMD_protocol\\20230708\\NPEC52.20230605.BD4.CE3027.Control.1\\CropReporter\\124326957\\data\\HDR_90_NPEC52.20230605.BD4.CE3027.Control.1_1368.INF
3     D:/temp/p4p/Lucia\\PMD_PML_(Control_vs_Drought_one_genotype)\\PMD_protocol\\20230708\\NPEC52.20230605.BE11.CE3027.Control.3\\CropReporter\\142623408\\data\\HDR_90_NPEC52.20230605.BE11.CE3027.Control.3_1368.INF
4     D:/temp/p4p/Lucia\\PMD_PML_(Control_vs_Drought_one_genotype)\\PMD_protocol\\20230708\\NPEC52.20230605.BE29.CE3027.Control.4\\CropR

In [None]:
# Converting to datetime object
df[['Datetime']] <- as.POSIXct(df[['Datetime']], format = "%Y-%m-%d %H:%M:%OS")

# Datenum stored as integer, exact datetime of measurement
df[['datenum']] <- as.integer(df[['DateTime']])

# Date is multiplied with 86400 to get value at 00:00:00 of each day
df[['date']] <- as.numeric(as.Date(df[['DateTime']]))

In [14]:
# All columns
colnames(df)

NameError: ignored

In [None]:
# List of the columns to interpolate
trait_list <- list('MeanChlorophyll', 'MeanNdvi', 'MeanEgreen', 'MeanPsri', 'MeanAri', 'MeanMari', 'mean_yii')
