<a href="https://colab.research.google.com/github/Le119/Ontario-Lake-Fish-Classification/blob/Yihan/RNN_in_R.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Load the Processed data

In [9]:
load("/content/processed_AnalysisData_no200.Rdata")
ls()

Clean data as Jessi did

In [10]:
# libraries
library(dplyr)
library(tidyr)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




### 3 species, LT, LWT, SMB

In [11]:
processed_data_no200%>%group_by(spCode,fishNum)%>%count()

spCode,fishNum,n
<dbl>,<chr>,<int>
81,LT001,133
81,LT003,1213
81,LT004,970
81,LT005,581
81,LT006,306
81,LT007,638
81,LT009,649
81,LT010,129
81,LT011,508
81,LT012,360


## Remove individuals with missing transducers


In [12]:
processed_data_no200 <-processed_data_no200 %>%filter(is.na(F100)==F)

In [13]:
# also remove individual LWF23018 (only two pings)
processed_data_no200 <-processed_data_no200 %>%filter(fishNum!="LWF23018")

In [14]:
# remove the individual with -9x10^38 TS
processed_data_no200 <-processed_data_no200[-36830,]

## LT

In [15]:
# LT019 and LT23008 were dead the whole time. Remove.
processed_data_no200 <-processed_data_no200 %>%filter(fishNum!="LT019")
processed_data_no200 <-processed_data_no200 %>%filter(fishNum!="LT23008")

In [16]:
# LT015 was at a very shallow depth and on quadrant for the majority of pinging, then seemed to move to the correct depth. Keep only those pings
processed_data_no200 <- processed_data_no200[!(processed_data_no200$fishNum == "LT015" & processed_data_no200$Target_true_depth > 15.5 ) ,]

In [17]:
# LT018 was rough on attachment too, also has two rythmic changes in depth - going to remove the times the fish was above 15.5m as this will get rid of the time that fish was being dragged potentially and the first part of the timeseries where the fish was rough
processed_data_no200 <- processed_data_no200[!(processed_data_no200$fishNum == "LT018" & processed_data_no200$Target_true_depth > 15.5 ) ,]

In [18]:
# LT23018 was rough on attachment, barely alive, but "suprisingly okay" coming back - no clear indication of when it got "okay" in the data so remove whole fish
processed_data_no200 <-processed_data_no200 %>%filter(fishNum!="LT23018")

In [19]:
# LT013 was almost dead on release as well as dead on retrival - remove all.
processed_data_no200 <-processed_data_no200 %>%filter(fishNum!="LT013")

In [20]:
# LT23001 looks like there are barely any salvagable pings - remove all
processed_data_no200 <-processed_data_no200%>%filter(fishNum!="LT23001")

In [21]:
## Cleaned data:
processed_data_no200 %>%filter(spCode==81)%>%group_by(fishNum)%>%count()
# 21 fish, 22,792 pings

fishNum,n
<chr>,<int>
LT009,649
LT010,129
LT011,508
LT012,360
LT014,2052
LT015,172
LT016,1877
LT017,799
LT018,25
LT021,325


In [22]:
## Looking at cleaned LT data
print(processed_data_no200%>%filter(spCode==81)%>%group_by(fishNum)%>%summarise(TL=mean(totalLength)),n=21)

[90m# A tibble: 20 × 2[39m
   fishNum    TL
   [3m[90m<chr>[39m[23m   [3m[90m<dbl>[39m[23m
[90m 1[39m LT009     521
[90m 2[39m LT010     532
[90m 3[39m LT011     590
[90m 4[39m LT012     555
[90m 5[39m LT014     539
[90m 6[39m LT015     499
[90m 7[39m LT016     503
[90m 8[39m LT017     472
[90m 9[39m LT018     486
[90m10[39m LT021     522
[90m11[39m LT23002   538
[90m12[39m LT23003   630
[90m13[39m LT23004   563
[90m14[39m LT23005   496
[90m15[39m LT23007   470
[90m16[39m LT23009   467
[90m17[39m LT23010   565
[90m18[39m LT23011   479
[90m19[39m LT23012   566
[90m20[39m LT23013   390


## LWT

In [23]:
# 23004 and 23014 both pretty much dead the whole time
processed_data_no200<-processed_data_no200%>%filter(fishNum!="LWF23004")
processed_data_no200<-processed_data_no200%>%filter(fishNum!="LWF23014")

In [24]:
# 23006 and 23008 both "swam" upside down for the majority of the time
processed_data_no200<-processed_data_no200%>%filter(fishNum!="LWF23006")
processed_data_no200<-processed_data_no200%>%filter(fishNum!="LWF23008")

In [25]:
install.packages("lubridate")
library(lubridate)

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)


Attaching package: ‘lubridate’


The following objects are masked from ‘package:base’:

    date, intersect, setdiff, union




In [26]:
LWF011<-processed_data_no200%>%
        filter(fishNum=="LWF011")%>%
        mutate(Ping_time=strptime(Ping_time,format = "%H:%M:%S")-(60*60*4), hour=hour(Ping_time), minute=minute(Ping_time))%>%
  filter(hour==19 | hour==20 & minute <= 9 | hour==20 & minute >= 13)%>%select(c(-hour,-minute))

In [27]:
processed_data_no200<-processed_data_no200%>%filter(fishNum!="LWF011")

In [28]:
processed_data_no200<-rbind(processed_data_no200,LWF011)

Look at the cleaned processed_data

In [32]:
processed_data_no200
column_names <- names(processed_data_no200)
# Print the column names
print(column_names)

dateTimeSample,fishNum,spCode,totalLength,forkLength,weight,girth,dorsoLatHeight,clipTag,sex,⋯,F165.5,F166,F166.5,F167,F167.5,F168,F168.5,F169,F169.5,F170
<dttm>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
26-07-20 04:56:02,LT009,81,521,474,1132,236,49,N,2,⋯,-33.31732,-33.48272,-33.98954,-34.71069,-35.47591,-36.11846,-36.57586,-36.90389,-37.16360,-37.37286
26-07-20 04:56:02,LT009,81,521,474,1132,236,49,N,2,⋯,-47.93437,-55.27752,-49.75377,-43.36868,-39.93659,-38.22149,-37.75096,-38.21294,-39.35730,-41.04031
26-07-20 04:56:02,LT009,81,521,474,1132,236,49,N,2,⋯,-43.51204,-44.28880,-45.48378,-46.41323,-46.20443,-46.10777,-47.79635,-51.34248,-52.21717,-51.22738
26-07-20 04:56:02,LT009,81,521,474,1132,236,49,N,2,⋯,-43.60107,-42.57205,-42.36388,-42.79280,-43.24235,-43.47555,-44.25610,-46.26738,-49.37243,-51.95134
26-07-20 04:56:02,LT009,81,521,474,1132,236,49,N,2,⋯,-37.35113,-36.52720,-36.12024,-36.28219,-36.87755,-37.58748,-38.33421,-38.65026,-37.10683,-34.48625
26-07-20 04:56:02,LT009,81,521,474,1132,236,49,N,2,⋯,-33.32777,-32.72380,-32.66965,-32.60390,-32.43916,-32.96681,-35.13315,-39.78136,-46.17297,-44.42698
26-07-20 04:56:02,LT009,81,521,474,1132,236,49,N,2,⋯,-43.06297,-44.89117,-46.83981,-48.26073,-48.73601,-46.82269,-44.71505,-44.26200,-45.54696,-48.31731
26-07-20 04:56:02,LT009,81,521,474,1132,236,49,N,2,⋯,-42.60981,-44.27151,-48.55686,-57.49871,-48.56347,-43.80375,-41.95260,-41.66501,-42.73266,-45.96766
26-07-20 04:56:02,LT009,81,521,474,1132,236,49,N,2,⋯,-42.17858,-46.13763,-49.22816,-42.07251,-35.43637,-31.48083,-29.72450,-29.86794,-31.79124,-35.66754
26-07-20 04:56:02,LT009,81,521,474,1132,236,49,N,2,⋯,-33.40930,-35.03419,-39.22272,-50.58940,-48.95248,-44.55936,-43.09744,-38.97563,-36.36449,-36.25474


  [1] "dateTimeSample"                "fishNum"                      
  [3] "spCode"                        "totalLength"                  
  [5] "forkLength"                    "weight"                       
  [7] "girth"                         "dorsoLatHeight"               
  [9] "clipTag"                       "sex"                          
 [11] "mat"                           "airbladderTotalLength"        
 [13] "airBladderWidth"               "Region_name"                  
 [15] "FishTrack"                     "MaxTSdiff"                    
 [17] "Ping_time"                     "deltaRange"                   
 [19] "deltaMinAng"                   "deltaMajAng"                  
 [21] "aspectAngle"                   "Target_range"                 
 [23] "Angle_minor_axis"              "Angle_major_axis"             
 [25] "Distance_minor_axis"           "Distance_major_axis"          
 [27] "StandDev_Angles_Minor_Axis"    "StandDev_Angles_Major_Axis"   
 [29] "Target_true_d

In [50]:
# labels for each species
y <- as.factor(processed_data_no200$spCode)

In [53]:
# TS_mean as X
X <- processed_data_no200$TS_mean

In [54]:
p1_processed_data_no200 <- processed_data_no200 %>%
  mutate(Ping_time_POSIXct = as.POSIXct(Ping_time, format = "%H:%M:%S") - (60 * 60 * 4)) %>%
  filter(Ping_time_POSIXct - lead(Ping_time_POSIXct) < 2) %>%
  group_by(fishNum) %>%
  mutate(TS_mean_2sec = mean(TS_mean, na.rm = TRUE)) %>%
  ungroup()

In [56]:
# LWT
p1_processed_data_no200_LWT <- p1_processed_data_no200 %>%
  filter(spCode == 91)

# LT
p1_processed_data_no200_LT <- p1_processed_data_no200 %>%
  filter(spCode == 81)

In [64]:
# Install the required packages
install.packages("keras")
install.packages("tensorflow")

# Load the installed packages
library(keras)
library(tensorflow)

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)

also installing the dependencies ‘RcppTOML’, ‘tfautograph’, ‘reticulate’, ‘tensorflow’, ‘tfruns’


Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)



RNN Implementation

In [None]:
# prompt: create (x, y) for the input to RNN, x should be the TS_mean_2sec, y should be the species, the RNN is to conduct binary classification, also add the implementation of RNN

# Reshape the input data
input_data <- array_reshape(p1_processed_data_no200_LWT$TS_mean_2sec, c(dim(p1_processed_data_no200_LWT$TS_mean_2sec)[1], 1, 1))

# Define the model
rnn <- keras_model_sequential()
rnn <- rnn %>%
  layer_lstm(units = 128, return_sequences = TRUE, input_shape = c(1, 1)) %>%
  layer_lstm(units = 128) %>%
  layer_dense(units = 1, activation = 'sigmoid')

# Compile the model
rnn %>%
  compile(optimizer = 'adam',
          loss = 'binary_crossentropy',
          metrics = c('accuracy'))

# Train the model
rnn %>%
  fit(x = input_data,
      y = p1_processed_data_no200_LWT$spCode,
      epochs = 100,
      batch_size = 32)

# Evaluate the model
rnn %>%
  evaluate(x = input_data,
           y = p1_processed_data_no200_LWT$spCode)



### Previous methods to calculate time difference

In [None]:
LT <- processed_data[processed_data$spCode == 81, ]

# Function to convert time string to POSIXct
convert_to_datetime <- function(time_str) {
  return(as.POSIXct(time_str, format = "%H:%M:%OS"))
}

# Function to calculate the time difference for every 5 rows
calculate_time_difference_for_five_rows <- function(data) {
  time_differences <- numeric(0)

  for (i in seq(1, nrow(data), by = 5)) {
    if ((i + 4) <= nrow(data)) {
      time_str1 <- data$Ping_time[i]
      time_str2 <- data$Ping_time[i + 4]

      time1 <- convert_to_datetime(time_str1)
      time2 <- convert_to_datetime(time_str2)

      time_difference <- as.numeric(difftime(time1, time2, units = "secs"))
      time_differences <- c(time_differences, time_difference)
    }
  }

  return(time_differences)
}

# Calculate time differences for every 5 rows
time_differences <- calculate_time_difference_for_five_rows(LT)

groups <- cumsum(abs(time_differences) >= 5)


# Run-length encoding to calculate the length of each region
lengths_of_regions <- rle(groups)$lengths

# Print the lengths of each region
print(lengths_of_regions)

# choose 5 ping for each region, doesn't have to be same time interval

In [None]:
LWT <- processed_data[processed_data$spCode == 91, ]

# Function to convert time string to POSIXct
convert_to_datetime <- function(time_str) {
  return(as.POSIXct(time_str, format = "%H:%M:%OS"))
}

# Function to calculate the time difference for every 5 rows
calculate_time_difference_for_five_rows <- function(data) {
  time_differences <- numeric(0)

  for (i in seq(1, nrow(data), by = 5)) {
    if ((i + 4) <= nrow(data)) {
      time_str1 <- data$Ping_time[i]
      time_str2 <- data$Ping_time[i + 4]

      time1 <- convert_to_datetime(time_str1)
      time2 <- convert_to_datetime(time_str2)

      time_difference <- as.numeric(difftime(time1, time2, units = "secs"))
      time_differences <- c(time_differences, time_difference)
    }
  }

  return(time_differences)
}

# Calculate time differences for every 5 rows
time_differences <- calculate_time_difference_for_five_rows(LT)

groups <- cumsum(abs(time_differences) >= 5)


# Run-length encoding to calculate the length of each region
lengths_of_regions <- rle(groups)$lengths

# Print the lengths of each region
print(lengths_of_regions)

In [None]:
Ping_time_number <- processed_data[, c("Ping_time", "pingNumber")]

# Function to convert time string to POSIXct
convert_to_datetime <- function(time_str) {
  return(as.POSIXct(time_str, format = "%H:%M:%OS"))
}

# I wanna calculate the time difference for every 5 pingNumber
calculate_time_difference <- function(time_str1, time_str2) {

  time1 <- convert_to_datetime(time_str1)
  time2 <- convert_to_datetime(time_str2)


  time_difference <- abs(difftime(time1, time2))

  return(time_difference)
}

print(paste("Time difference:", as.numeric(time_difference), "seconds"))
