<a href="https://colab.research.google.com/github/Le119/Ontario-Lake-Fish-Classification/blob/Yihan/Input%20data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Load the Processed data

In [1]:
load("/content/processed_AnalysisData_no200.Rdata")
ls()

Clean data as Jessi did

In [2]:
# libraries
library(dplyr)
library(tidyr)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




### 3 species, LT, LWT, SMB

In [3]:
processed_data_no200%>%group_by(spCode,fishNum)%>%count()

spCode,fishNum,n
<dbl>,<chr>,<int>
81,LT001,133
81,LT003,1213
81,LT004,970
81,LT005,581
81,LT006,306
81,LT007,638
81,LT009,649
81,LT010,129
81,LT011,508
81,LT012,360


## Remove individuals with missing transducers


In [4]:
processed_data_no200 <-processed_data_no200 %>%filter(is.na(F100)==F)

In [5]:
# also remove individual LWF23018 (only two pings)
processed_data_no200 <-processed_data_no200 %>%filter(fishNum!="LWF23018")

In [6]:
# remove the individual with -9x10^38 TS
processed_data_no200 <-processed_data_no200[-36830,]

## LT

In [7]:
# LT019 and LT23008 were dead the whole time. Remove.
processed_data_no200 <-processed_data_no200 %>%filter(fishNum!="LT019")
processed_data_no200 <-processed_data_no200 %>%filter(fishNum!="LT23008")

In [8]:
# LT015 was at a very shallow depth and on quadrant for the majority of pinging, then seemed to move to the correct depth. Keep only those pings
processed_data_no200 <- processed_data_no200[!(processed_data_no200$fishNum == "LT015" & processed_data_no200$Target_true_depth > 15.5 ) ,]

In [9]:
# LT018 was rough on attachment too, also has two rythmic changes in depth - going to remove the times the fish was above 15.5m as this will get rid of the time that fish was being dragged potentially and the first part of the timeseries where the fish was rough
processed_data_no200 <- processed_data_no200[!(processed_data_no200$fishNum == "LT018" & processed_data_no200$Target_true_depth > 15.5 ) ,]

In [10]:
# LT23018 was rough on attachment, barely alive, but "suprisingly okay" coming back - no clear indication of when it got "okay" in the data so remove whole fish
processed_data_no200 <-processed_data_no200 %>%filter(fishNum!="LT23018")

In [11]:
# LT013 was almost dead on release as well as dead on retrival - remove all.
processed_data_no200 <-processed_data_no200 %>%filter(fishNum!="LT013")

In [12]:
# LT23001 looks like there are barely any salvagable pings - remove all
processed_data_no200 <-processed_data_no200%>%filter(fishNum!="LT23001")

In [13]:
## Cleaned data:
processed_data_no200 %>%filter(spCode==81)%>%group_by(fishNum)%>%count()
# 21 fish, 22,792 pings

fishNum,n
<chr>,<int>
LT009,649
LT010,129
LT011,508
LT012,360
LT014,2052
LT015,172
LT016,1877
LT017,799
LT018,25
LT021,325


In [14]:
## Looking at cleaned LT data
print(processed_data_no200%>%filter(spCode==81)%>%group_by(fishNum)%>%summarise(TL=mean(totalLength)),n=21)

[90m# A tibble: 20 × 2[39m
   fishNum    TL
   [3m[90m<chr>[39m[23m   [3m[90m<dbl>[39m[23m
[90m 1[39m LT009     521
[90m 2[39m LT010     532
[90m 3[39m LT011     590
[90m 4[39m LT012     555
[90m 5[39m LT014     539
[90m 6[39m LT015     499
[90m 7[39m LT016     503
[90m 8[39m LT017     472
[90m 9[39m LT018     486
[90m10[39m LT021     522
[90m11[39m LT23002   538
[90m12[39m LT23003   630
[90m13[39m LT23004   563
[90m14[39m LT23005   496
[90m15[39m LT23007   470
[90m16[39m LT23009   467
[90m17[39m LT23010   565
[90m18[39m LT23011   479
[90m19[39m LT23012   566
[90m20[39m LT23013   390


## LWT

In [15]:
# 23004 and 23014 both pretty much dead the whole time
processed_data_no200<-processed_data_no200%>%filter(fishNum!="LWF23004")
processed_data_no200<-processed_data_no200%>%filter(fishNum!="LWF23014")

In [16]:
# 23006 and 23008 both "swam" upside down for the majority of the time
processed_data_no200<-processed_data_no200%>%filter(fishNum!="LWF23006")
processed_data_no200<-processed_data_no200%>%filter(fishNum!="LWF23008")

In [17]:
install.packages("lubridate")
library(lubridate)

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)


Attaching package: ‘lubridate’


The following objects are masked from ‘package:base’:

    date, intersect, setdiff, union




In [18]:
LWF011<-processed_data_no200%>%
        filter(fishNum=="LWF011")%>%
        mutate(Ping_time=strptime(Ping_time,format = "%H:%M:%S")-(60*60*4), hour=hour(Ping_time), minute=minute(Ping_time))%>%
  filter(hour==19 | hour==20 & minute <= 9 | hour==20 & minute >= 13)%>%select(c(-hour,-minute))

In [19]:
processed_data_no200<-processed_data_no200%>%filter(fishNum!="LWF011")

In [20]:
processed_data_no200<-rbind(processed_data_no200,LWF011)

Look at the cleaned processed_data

In [21]:
processed_data_no200
column_names <- names(processed_data_no200)
# Print the column names
print(column_names)

dateTimeSample,fishNum,spCode,totalLength,forkLength,weight,girth,dorsoLatHeight,clipTag,sex,⋯,F165.5,F166,F166.5,F167,F167.5,F168,F168.5,F169,F169.5,F170
<dttm>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
26-07-20 04:56:02,LT009,81,521,474,1132,236,49,N,2,⋯,-33.31732,-33.48272,-33.98954,-34.71069,-35.47591,-36.11846,-36.57586,-36.90389,-37.16360,-37.37286
26-07-20 04:56:02,LT009,81,521,474,1132,236,49,N,2,⋯,-47.93437,-55.27752,-49.75377,-43.36868,-39.93659,-38.22149,-37.75096,-38.21294,-39.35730,-41.04031
26-07-20 04:56:02,LT009,81,521,474,1132,236,49,N,2,⋯,-43.51204,-44.28880,-45.48378,-46.41323,-46.20443,-46.10777,-47.79635,-51.34248,-52.21717,-51.22738
26-07-20 04:56:02,LT009,81,521,474,1132,236,49,N,2,⋯,-43.60107,-42.57205,-42.36388,-42.79280,-43.24235,-43.47555,-44.25610,-46.26738,-49.37243,-51.95134
26-07-20 04:56:02,LT009,81,521,474,1132,236,49,N,2,⋯,-37.35113,-36.52720,-36.12024,-36.28219,-36.87755,-37.58748,-38.33421,-38.65026,-37.10683,-34.48625
26-07-20 04:56:02,LT009,81,521,474,1132,236,49,N,2,⋯,-33.32777,-32.72380,-32.66965,-32.60390,-32.43916,-32.96681,-35.13315,-39.78136,-46.17297,-44.42698
26-07-20 04:56:02,LT009,81,521,474,1132,236,49,N,2,⋯,-43.06297,-44.89117,-46.83981,-48.26073,-48.73601,-46.82269,-44.71505,-44.26200,-45.54696,-48.31731
26-07-20 04:56:02,LT009,81,521,474,1132,236,49,N,2,⋯,-42.60981,-44.27151,-48.55686,-57.49871,-48.56347,-43.80375,-41.95260,-41.66501,-42.73266,-45.96766
26-07-20 04:56:02,LT009,81,521,474,1132,236,49,N,2,⋯,-42.17858,-46.13763,-49.22816,-42.07251,-35.43637,-31.48083,-29.72450,-29.86794,-31.79124,-35.66754
26-07-20 04:56:02,LT009,81,521,474,1132,236,49,N,2,⋯,-33.40930,-35.03419,-39.22272,-50.58940,-48.95248,-44.55936,-43.09744,-38.97563,-36.36449,-36.25474


  [1] "dateTimeSample"                "fishNum"                      
  [3] "spCode"                        "totalLength"                  
  [5] "forkLength"                    "weight"                       
  [7] "girth"                         "dorsoLatHeight"               
  [9] "clipTag"                       "sex"                          
 [11] "mat"                           "airbladderTotalLength"        
 [13] "airBladderWidth"               "Region_name"                  
 [15] "FishTrack"                     "MaxTSdiff"                    
 [17] "Ping_time"                     "deltaRange"                   
 [19] "deltaMinAng"                   "deltaMajAng"                  
 [21] "aspectAngle"                   "Target_range"                 
 [23] "Angle_minor_axis"              "Angle_major_axis"             
 [25] "Distance_minor_axis"           "Distance_major_axis"          
 [27] "StandDev_Angles_Minor_Axis"    "StandDev_Angles_Major_Axis"   
 [29] "Target_true_d

In [22]:
# labels for each species
y <- as.factor(processed_data_no200$spCode)

In [45]:
# target strength as X
X_170 <- processed_data_no200$F170 # for F170


convert_to_datetime <- function(time_str) {
  return(as.POSIXct(time_str, format = "%H:%M:%OS"))
}

# Modify the Ping_time column
processed_data_no200 <- processed_data_no200 %>%
  mutate(Ping_time = convert_to_datetime(Ping_time))

In [46]:
library(dplyr)

processed_data_no200_1sec <- processed_data_no200 %>%
  arrange(fishNum, Ping_time) %>%
  group_by(fishNum) %>%
  filter(!is.na(as.numeric(Ping_time))) %>%  # Exclude rows with non-numeric Ping_time
  mutate(Ping_time = as.numeric(Ping_time),  # Convert Ping_time to numeric
         diff = Ping_time - lag(Ping_time, default = first(Ping_time))) %>%
  mutate(region = cumsum(diff > 1)) %>%
  ungroup() %>%
  group_by(fishNum, region, .add = TRUE) %>%
  filter(all(!is.na(diff), diff <= 1))  # Exclude rows with NAs in diff and diff > 1

# Create data frames for each region
region_data_frames <- processed_data_no200_1sec %>%
  group_split(fishNum, region)

# Print data frames for each region
for (i in seq_along(region_data_frames)) {
  cat("Region", i, ":\n")
  print(region_data_frames[[i]])
  cat("\n")
}


“[1m[22m... is ignored in group_split(<grouped_df>), please use group_by(..., .add =
TRUE) %>% group_split()”


Region 1 :
[90m# A tibble: 2 × 304[39m
  dateTimeSample    fishNum spCode totalLength forkLength weight girth
  [3m[90m<dttm>[39m[23m            [3m[90m<chr>[39m[23m    [3m[90m<dbl>[39m[23m       [3m[90m<dbl>[39m[23m [3m[90m<chr>[39m[23m       [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m
[90m1[39m 26-07-20 [90m04:56:02[39m LT009       81         521 474          [4m1[24m132   236
[90m2[39m 26-07-20 [90m04:56:02[39m LT009       81         521 474          [4m1[24m132   236
[90m# ℹ 297 more variables: dorsoLatHeight <dbl>, clipTag <chr>, sex <dbl>,[39m
[90m#   mat <dbl>, airbladderTotalLength <dbl>, airBladderWidth <dbl>,[39m
[90m#   Region_name <chr>, FishTrack <chr>, MaxTSdiff <dbl>, Ping_time <dbl>,[39m
[90m#   deltaRange <dbl>, deltaMinAng <dbl>, deltaMajAng <dbl>, aspectAngle <dbl>,[39m
[90m#   Target_range <dbl>, Angle_minor_axis <dbl>, Angle_major_axis <dbl>,[39m
[90m#   Distance_minor_axis <dbl>, Distance_major_axis <dbl>,[39m
[

In [51]:
# create a dataframe, column1: the mean of the column F170 across the rows for each region, column2: the spCode, column 3 fishNum

library(dplyr)

# Assuming you want to store the results in a list
result_list <- list()

for (i in seq_along(region_data_frames)) {
  current_region_data <- region_data_frames[[i]]

  # Calculate the mean of the column F170 for each region
  mean_F170 <- current_region_data %>%
    summarise(mean_F170 = mean(F170, na.rm = TRUE))

  # Extract spCode and fishNum for each region
  spCode_fish <- current_region_data %>%
    select(spCode) %>%
    distinct()  # Assuming each region has the same spCode

  # Combine the results into a single dataframe
  region_result <- cbind(mean_F170, spCode_fish)

  # Append the result to the list
  result_list[[i]] <- region_result
}

# Print the list of dataframes
for (i in seq_along(result_list)) {
  cat("Region", i, ":\n")
  print(result_list[[i]])
  cat("\n")
}

Region 1 :
  mean_F170 spCode
1 -39.20659     81

Region 2 :
  mean_F170 spCode
1 -41.10722     81

Region 3 :
  mean_F170 spCode
1 -40.16063     81

Region 4 :
  mean_F170 spCode
1 -40.34681     81

Region 5 :
  mean_F170 spCode
1 -49.20419     81

Region 6 :
  mean_F170 spCode
1 -42.12254     81

Region 7 :
  mean_F170 spCode
1 -36.67415     81

Region 8 :
  mean_F170 spCode
1 -42.77672     81

Region 9 :
  mean_F170 spCode
1 -45.32131     81

Region 10 :
  mean_F170 spCode
1 -56.76095     81

Region 11 :
  mean_F170 spCode
1 -37.25862     81

Region 12 :
  mean_F170 spCode
1 -38.18824     81

Region 13 :
  mean_F170 spCode
1 -41.55708     81

Region 14 :
  mean_F170 spCode
1 -42.57233     81

Region 15 :
  mean_F170 spCode
1 -41.43381     81

Region 16 :
  mean_F170 spCode
1 -55.21315     81

Region 17 :
  mean_F170 spCode
1 -41.68797     81

Region 18 :
  mean_F170 spCode
1 -41.52655     81

Region 19 :
  mean_F170 spCode
1 -41.05113     81

Region 20 :
  mean_F170 spCode
1 -44.183

In [55]:
library(dplyr)

# Assuming you want to store the results in a list
result_list <- list()

for (i in seq_along(region_data_frames)) {
  current_region_data <- region_data_frames[[i]]

  # Remove specified columns
  processed_data_no_cols <- current_region_data %>%
    select(-fishNum, -forkLength, -FishTrack, -Fish_track_change_in_range, -Fish_track_change_in_depth)

  # Select columns that start with F followed by an integer
  selected_columns <- processed_data_no_cols %>%
    select(matches("^F\\d+$")) %>%
    mutate(across(everything(), as.numeric))

  # Calculate the mean of the selected columns for each region
  mean_frequencies <- processed_data_no_cols %>%
    summarise(across(starts_with("F"), mean, na.rm = TRUE))

  # Extract spCode for each region
  spCode <- processed_data_no_cols %>%
    distinct(spCode)

  # Combine the results into a single dataframe
  region_result <- cbind(mean_frequencies, spCode)

  # Append the result to the list
  result_list[[i]] <- region_result

  # Save the result as a CSV file (adjust the file path as needed)
  write.csv(region_result, file = paste0("region_", i, "_result.csv"), row.names = FALSE)
}

# Print the list of dataframes (optional)
for (i in seq_along(result_list)) {
  cat("Region", i, ":\n")
  print(result_list[[i]])
  cat("\n")
}


Region 1 :
        F45     F45.5       F46     F46.5      F47     F47.5       F48
1 -51.67066 -51.30251 -50.33475 -49.00289 -48.2418 -47.71139 -47.72193
      F48.5       F49     F49.5       F50    F50.5       F51     F51.5
1 -47.78742 -47.75396 -47.65781 -47.67734 -48.0689 -48.61567 -49.15467
        F52     F52.5       F53     F53.5       F54     F54.5       F55
1 -49.14249 -48.28736 -47.03906 -45.99224 -45.33652 -45.20277 -45.51906
      F55.5       F56     F56.5       F57     F57.5       F58     F58.5
1 -46.27012 -47.39508 -48.88245 -50.98391 -54.34656 -53.80165 -50.92308
        F59     F59.5       F60     F60.5       F61     F61.5       F62
1 -49.62775 -49.40131 -49.96326 -50.91331 -51.89288 -52.55486 -52.70153
      F62.5       F63     F63.5       F64     F64.5       F65     F65.5
1 -51.92748 -50.26576 -48.49261 -46.95183 -45.67965 -44.81952 -44.33795
        F66     F66.5       F67     F67.5       F68     F68.5       F69
1 -43.97944 -43.56897 -43.14101 -42.78285 -42.60082 -42.6

In [54]:
library(dplyr)

# Assuming you want to store the results in a list
result_list <- list()

for (i in seq_along(region_data_frames)) {
  current_region_data <- region_data_frames[[i]]

  # Remove specified columns
  processed_data_no_cols <- current_region_data %>%
    select(-fishNum, -forkLength, -FishTrack, -Fish_track_change_in_range, -Fish_track_change_in_depth)

  # Select columns that start with F followed by an integer
  selected_columns <- processed_data_no_cols %>%
    select(matches("^F\\d+$")) %>%
    mutate(across(everything(), as.numeric))

  # Calculate the mean of the selected columns for each region
  mean_frequencies <- processed_data_no_cols %>%
    summarise(across(starts_with("F"), mean, na.rm = TRUE))

  # Extract spCode for each region
  spCode <- processed_data_no_cols %>%
    distinct(spCode)

  # Combine the results into a single dataframe
  region_result <- cbind(mean_frequencies, spCode)

  # Append the result to the list
  result_list[[i]] <- region_result
}

# Print the list of dataframes
for (i in seq_along(result_list)) {
  cat("Region", i, ":\n")
  print(result_list[[i]])
  cat("\n")
}


# for each region, save the values of the frequencies and the spcode

Region 1 :
        F45     F45.5       F46     F46.5      F47     F47.5       F48
1 -51.67066 -51.30251 -50.33475 -49.00289 -48.2418 -47.71139 -47.72193
      F48.5       F49     F49.5       F50    F50.5       F51     F51.5
1 -47.78742 -47.75396 -47.65781 -47.67734 -48.0689 -48.61567 -49.15467
        F52     F52.5       F53     F53.5       F54     F54.5       F55
1 -49.14249 -48.28736 -47.03906 -45.99224 -45.33652 -45.20277 -45.51906
      F55.5       F56     F56.5       F57     F57.5       F58     F58.5
1 -46.27012 -47.39508 -48.88245 -50.98391 -54.34656 -53.80165 -50.92308
        F59     F59.5       F60     F60.5       F61     F61.5       F62
1 -49.62775 -49.40131 -49.96326 -50.91331 -51.89288 -52.55486 -52.70153
      F62.5       F63     F63.5       F64     F64.5       F65     F65.5
1 -51.92748 -50.26576 -48.49261 -46.95183 -45.67965 -44.81952 -44.33795
        F66     F66.5       F67     F67.5       F68     F68.5       F69
1 -43.97944 -43.56897 -43.14101 -42.78285 -42.60082 -42.6

In [52]:
# from F45 to F170, we have frequcies like F45, F45.5, F50,....F169.5, F170, I want to # create a dataframe, column1: the mean of the columns of the frequencies across the rows for each region, column2: the spCode
library(dplyr)

# Assuming you want to store the results in a list
result_list <- list()

for (i in seq_along(region_data_frames)) {
  current_region_data <- region_data_frames[[i]]

  # Select columns F45 to F170
  selected_columns <- select(current_region_data, starts_with("F")) %>% # start with "F" and also follow with integer
    select(F45:F170)

  # Calculate the mean of the selected columns for each region
  mean_frequencies <- current_region_data %>%
    summarise(across(starts_with("F"), mean, na.rm = TRUE))

  # Extract spCode for each region
  spCode <- current_region_data %>%
    distinct(spCode)

  # Combine the results into a single dataframe
  region_result <- cbind(mean_frequencies, spCode)

  # Append the result to the list
  result_list[[i]] <- region_result
}

# Print the list of dataframes
for (i in seq_along(result_list)) {
  cat("Region", i, ":\n")
  print(result_list[[i]])
  cat("\n")
}


[1m[22m[36mℹ[39m In argument: `across(starts_with("F"), mean, na.rm = TRUE)`.
[1m[22m[33m![39m The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
Supply arguments directly to `.fns` through an anonymous function instead.

  # Previously
  across(a:b, mean, na.rm = TRUE)

  # Now
  across(a:b, \(x) mean(x, na.rm = TRUE))
[1m[22m[36mℹ[39m In argument: `across(starts_with("F"), mean, na.rm = TRUE)`.
[33m![39m argument is not numeric or logical: returning NA
[1m[22m[36mℹ[39m In argument: `across(starts_with("F"), mean, na.rm = TRUE)`.
[33m![39m argument is not numeric or logical: returning NA
[1m[22m[36mℹ[39m In argument: `across(starts_with("F"), mean, na.rm = TRUE)`.
[33m![39m argument is not numeric or logical: returning NA
[1m[22m[36mℹ[39m In argument: `across(starts_with("F"), mean, na.rm = TRUE)`.
[33m![39m argument is not numeric or logical: returning NA
[1m[22m[36mℹ[39m In argument: `across(starts_with("F"), mean, na.rm = TRUE)`.


Region 1 :
  fishNum forkLength FishTrack Fish_track_change_in_range
1      NA         NA        NA                   0.163466
  Fish_track_change_in_depth       F45     F45.5       F46     F46.5      F47
1                    0.15712 -51.67066 -51.30251 -50.33475 -49.00289 -48.2418
      F47.5       F48     F48.5       F49     F49.5       F50    F50.5
1 -47.71139 -47.72193 -47.78742 -47.75396 -47.65781 -47.67734 -48.0689
        F51     F51.5       F52     F52.5       F53     F53.5       F54
1 -48.61567 -49.15467 -49.14249 -48.28736 -47.03906 -45.99224 -45.33652
      F54.5       F55     F55.5       F56     F56.5       F57     F57.5
1 -45.20277 -45.51906 -46.27012 -47.39508 -48.88245 -50.98391 -54.34656
        F58     F58.5       F59     F59.5       F60     F60.5       F61
1 -53.80165 -50.92308 -49.62775 -49.40131 -49.96326 -50.91331 -51.89288
      F61.5       F62     F62.5       F63     F63.5       F64     F64.5
1 -52.55486 -52.70153 -51.92748 -50.26576 -48.49261 -46.95183 -45.67965

In [35]:
length(processed_data_no200)

In [38]:
processed_data_no200

dateTimeSample,fishNum,spCode,totalLength,forkLength,weight,girth,dorsoLatHeight,clipTag,sex,⋯,F165.5,F166,F166.5,F167,F167.5,F168,F168.5,F169,F169.5,F170
<dttm>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
26-07-20 04:56:02,LT009,81,521,474,1132,236,49,N,2,⋯,-33.31732,-33.48272,-33.98954,-34.71069,-35.47591,-36.11846,-36.57586,-36.90389,-37.16360,-37.37286
26-07-20 04:56:02,LT009,81,521,474,1132,236,49,N,2,⋯,-47.93437,-55.27752,-49.75377,-43.36868,-39.93659,-38.22149,-37.75096,-38.21294,-39.35730,-41.04031
26-07-20 04:56:02,LT009,81,521,474,1132,236,49,N,2,⋯,-43.51204,-44.28880,-45.48378,-46.41323,-46.20443,-46.10777,-47.79635,-51.34248,-52.21717,-51.22738
26-07-20 04:56:02,LT009,81,521,474,1132,236,49,N,2,⋯,-43.60107,-42.57205,-42.36388,-42.79280,-43.24235,-43.47555,-44.25610,-46.26738,-49.37243,-51.95134
26-07-20 04:56:02,LT009,81,521,474,1132,236,49,N,2,⋯,-37.35113,-36.52720,-36.12024,-36.28219,-36.87755,-37.58748,-38.33421,-38.65026,-37.10683,-34.48625
26-07-20 04:56:02,LT009,81,521,474,1132,236,49,N,2,⋯,-33.32777,-32.72380,-32.66965,-32.60390,-32.43916,-32.96681,-35.13315,-39.78136,-46.17297,-44.42698
26-07-20 04:56:02,LT009,81,521,474,1132,236,49,N,2,⋯,-43.06297,-44.89117,-46.83981,-48.26073,-48.73601,-46.82269,-44.71505,-44.26200,-45.54696,-48.31731
26-07-20 04:56:02,LT009,81,521,474,1132,236,49,N,2,⋯,-42.60981,-44.27151,-48.55686,-57.49871,-48.56347,-43.80375,-41.95260,-41.66501,-42.73266,-45.96766
26-07-20 04:56:02,LT009,81,521,474,1132,236,49,N,2,⋯,-42.17858,-46.13763,-49.22816,-42.07251,-35.43637,-31.48083,-29.72450,-29.86794,-31.79124,-35.66754
26-07-20 04:56:02,LT009,81,521,474,1132,236,49,N,2,⋯,-33.40930,-35.03419,-39.22272,-50.58940,-48.95248,-44.55936,-43.09744,-38.97563,-36.36449,-36.25474


In [37]:
length(processed_data_no200_1sec)
processed_data_no200_1sec

dateTimeSample,fishNum,spCode,totalLength,forkLength,weight,girth,dorsoLatHeight,clipTag,sex,⋯,F166,F166.5,F167,F167.5,F168,F168.5,F169,F169.5,F170,diff
<dttm>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
27-07-20 04:56:02,LWF011,91,401,368,606,205,36,N,1,⋯,-42.44757,-42.40249,-43.17331,-44.99774,-47.82517,-50.62874,-51.45151,-50.22685,-48.55602,0
27-07-20 04:56:02,LWF011,91,401,368,606,205,36,N,1,⋯,-56.06518,-65.09759,-64.94845,-59.30429,-57.64217,-58.58257,-60.74228,-57.50928,-52.77672,0
27-07-20 04:56:02,LWF011,91,401,368,606,205,36,N,1,⋯,-50.20801,-50.67703,-51.04862,-49.47546,-46.97736,-45.76630,-46.41095,-49.09709,-54.39143,0
27-07-20 04:56:02,LWF011,91,401,368,606,205,36,N,1,⋯,-47.75819,-48.21456,-48.77318,-49.03195,-48.89273,-48.72180,-48.95091,-49.62287,-50.14970,0
27-07-20 04:56:02,LWF011,91,401,368,606,205,36,N,1,⋯,-50.56060,-53.64664,-54.23772,-49.70088,-47.17877,-46.75784,-47.68144,-49.00906,-49.27187,0
27-07-20 04:56:02,LWF011,91,401,368,606,205,36,N,1,⋯,-49.17374,-50.99528,-54.11531,-56.59575,-54.80433,-52.88545,-52.74622,-53.54684,-52.19888,0
27-07-20 04:56:02,LWF011,91,401,368,606,205,36,N,1,⋯,-45.16338,-45.97524,-46.59139,-45.98768,-44.94536,-44.83201,-46.10662,-48.67234,-51.57042,0
27-07-20 04:56:02,LWF011,91,401,368,606,205,36,N,1,⋯,-53.10778,-58.06483,-67.55455,-55.05850,-49.19299,-46.95838,-47.45299,-50.27478,-51.33034,0
27-07-20 04:56:02,LWF011,91,401,368,606,205,36,N,1,⋯,-49.40165,-48.38443,-48.37252,-49.78747,-52.93148,-57.39996,-60.01855,-57.30721,-52.57944,0
27-07-20 04:56:02,LWF011,91,401,368,606,205,36,N,1,⋯,-56.44255,-58.22694,-58.56167,-55.61999,-51.01642,-48.38071,-47.95422,-49.21250,-50.57501,0


In [None]:
# LWT
p1_processed_data_no200_LWT <- p1_processed_data_no200 %>%
  filter(spCode == 91)

# LT
p1_processed_data_no200_LT <- p1_processed_data_no200 %>%
  filter(spCode == 81)

In [None]:
# Install the required packages
install.packages("keras")
install.packages("tensorflow")

# Load the installed packages
library(keras)
library(tensorflow)

RNN Implementation

In [None]:
# prompt: create (x, y) for the input to RNN, x should be the TS_mean_2sec, y should be the species, the RNN is to conduct binary classification, also add the implementation of RNN

# Reshape the input data
input_data <- array_reshape(p1_processed_data_no200_LWT$TS_mean_2sec, c(dim(p1_processed_data_no200_LWT$TS_mean_2sec)[1], 1, 1))

# Define the model
rnn <- keras_model_sequential()
rnn <- rnn %>%
  layer_lstm(units = 128, return_sequences = TRUE, input_shape = c(1, 1)) %>%
  layer_lstm(units = 128) %>%
  layer_dense(units = 1, activation = 'sigmoid')

# Compile the model
rnn %>%
  compile(optimizer = 'adam',
          loss = 'binary_crossentropy',
          metrics = c('accuracy'))

# Train the model
rnn %>%
  fit(x = input_data,
      y = p1_processed_data_no200_LWT$spCode,
      epochs = 100,
      batch_size = 32)

# Evaluate the model
rnn %>%
  evaluate(x = input_data,
           y = p1_processed_data_no200_LWT$spCode)



### Previous methods to calculate time difference

In [None]:
LT <- processed_data[processed_data$spCode == 81, ]

# Function to convert time string to POSIXct
convert_to_datetime <- function(time_str) {
  return(as.POSIXct(time_str, format = "%H:%M:%OS"))
}

# Function to calculate the time difference for every 5 rows
calculate_time_difference_for_five_rows <- function(data) {
  time_differences <- numeric(0)

  for (i in seq(1, nrow(data), by = 5)) {
    if ((i + 4) <= nrow(data)) {
      time_str1 <- data$Ping_time[i]
      time_str2 <- data$Ping_time[i + 4]

      time1 <- convert_to_datetime(time_str1)
      time2 <- convert_to_datetime(time_str2)

      time_difference <- as.numeric(difftime(time1, time2, units = "secs"))
      time_differences <- c(time_differences, time_difference)
    }
  }

  return(time_differences)
}

# Calculate time differences for every 5 rows
time_differences <- calculate_time_difference_for_five_rows(LT)

groups <- cumsum(abs(time_differences) >= 5)


# Run-length encoding to calculate the length of each region
lengths_of_regions <- rle(groups)$lengths

# Print the lengths of each region
print(lengths_of_regions)

# choose 5 ping for each region, doesn't have to be same time interval

In [None]:
LWT <- processed_data[processed_data$spCode == 91, ]

# Function to convert time string to POSIXct
convert_to_datetime <- function(time_str) {
  return(as.POSIXct(time_str, format = "%H:%M:%OS"))
}

# Function to calculate the time difference for every 5 rows
calculate_time_difference_for_five_rows <- function(data) {
  time_differences <- numeric(0)

  for (i in seq(1, nrow(data), by = 5)) {
    if ((i + 4) <= nrow(data)) {
      time_str1 <- data$Ping_time[i]
      time_str2 <- data$Ping_time[i + 4]

      time1 <- convert_to_datetime(time_str1)
      time2 <- convert_to_datetime(time_str2)

      time_difference <- as.numeric(difftime(time1, time2, units = "secs"))
      time_differences <- c(time_differences, time_difference)
    }
  }

  return(time_differences)
}

# Calculate time differences for every 5 rows
time_differences <- calculate_time_difference_for_five_rows(LT)

groups <- cumsum(abs(time_differences) >= 5)


# Run-length encoding to calculate the length of each region
lengths_of_regions <- rle(groups)$lengths

# Print the lengths of each region
print(lengths_of_regions)

In [None]:
Ping_time_number <- processed_data[, c("Ping_time", "pingNumber")]

# Function to convert time string to POSIXct
convert_to_datetime <- function(time_str) {
  return(as.POSIXct(time_str, format = "%H:%M:%OS"))
}

# I wanna calculate the time difference for every 5 pingNumber
calculate_time_difference <- function(time_str1, time_str2) {

  time1 <- convert_to_datetime(time_str1)
  time2 <- convert_to_datetime(time_str2)


  time_difference <- abs(difftime(time1, time2))

  return(time_difference)
}

print(paste("Time difference:", as.numeric(time_difference), "seconds"))


### frequencies as the columns
### Take the average over the target strength across the frequencies

