-
Notifications
You must be signed in to change notification settings - Fork 1
/
6_Haul_QA
1254 lines (1174 loc) · 56.5 KB
/
6_Haul_QA
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
###################
# Version Control #
###################
# R version 3.2.2 (2015-08-14): Fire Safety
# platform x86_64-w64-mingw32 (64-bit)
###############
# Script Info #
###############
# This is Script 6 of X
# This Script follows the guidlines outlined in Section 4.1 HH Data - Haul Summary Information
# from: Moriarty and Greenstreet 2016
# The next step is to model missing data in the haul data of the surveys, and plot
# lots of graphs to check all the haul stuff is believable now!
# AUTHOR: Meadhbh Moriarty, 2016
# REVIEWED BY: Nicola Walker (Cefas) Jonas Hentati Sundberg (SLU)
########################
# Check all -9 are ~NA #
########################
# All -9 should be NA as -9 represents a missing value in DATRAS
# Funtion to replace multiple -9 across lots of data tables
hauls<-hauls1
system.time(replace_function(hauls))
names(hauls)
summary(hauls)
# resturcture data table correctly
numCols <- c("SweepLngt", "HaulNo", "HaulDur", "ShootLat", "ShootLong", "HaulLat",
"HaulLong", "Depth", "Netopening", "Distance", "Warplngt",
"DoorSpread", "WingSpread", "GroundSpeed" , "SpeedWater")
hauls[, (numCols) := lapply(.SD, as.numeric), .SDcols = numCols]
##########################
# 4.1.1 Sample Location #
#########################
# check shoot and haul match ICES Stat Rec
summary(as.factor(hauls$Stratum))
# 37283 NA's
summary(as.factor(hauls$StatRec))
# 113 NA's
# ices<-read.csv("./Regional Boundaries Maps and Data/ICES_rectangles_statistics/Ices_rect_table.csv")
# names(ices)
# shoot longs that are NA should be -9 , silly DATRAS -9 thing messing stuff up here!
long_error<-hauls[is.na(hauls$ShootLong)]
list<-long_error$UniqueID
hauls$ShootLong[hauls$UniqueID%in%list]<--9
# check haul positions match ICES Stats
names(hauls)
lat<-hauls$ShootLat
hauls$check_StatRec<- ices.rect2(hauls$ShootLong, hauls$ShootLat)
summary(as.factor(hauls$check_StatRec))
summary(as.factor(hauls$Survey))
check<-subset(hauls, !(hauls$check_StatRec%in%hauls$StatRec))
check<-hauls[which(hauls$check_StatRec!=hauls$StatRec),]
check<-subset(hauls, is.na(hauls$Stratum),)
summary(as.factor(check$Survey))
summary(as.factor(hauls$check_StatRec))
# use Check StatRec col = mistakes in actual Stat Rec.
# some mis-matches - use check_StatRec for correct Stat Rec
# need to assign stratum based on survey!
# need maps
# New BaseMap
require(rgdal)
#load in basemap files for all maps
#set directory to working folder and insure subfolders are availible
europe<-readOGR("./Regional Boundaries Maps and Data/shapes//europe.dbf","europe")
contour_1000m<-readOGR("./Regional Boundaries Maps and Data/shapes//1000m.dbf","1000m")
NWW_boundary<-readOGR("./Regional Boundaries Maps and Data/shapes//NWW_boundary.dbf","NWW_boundary")
contour_200m<-readOGR(".//Regional Boundaries Maps and Data/shapes//200m.dbf","200m")
ospar<-readOGR("./Regional Boundaries Maps and Data/ospar_boundaries//OSPAR_inner_Boundary.dbf", "OSPAR_inner_Boundary")
spain<-readOGR("./Regional Boundaries Maps and Data/Sp-NGFS.WGS84//Sp_North.WGS84.dbf", "Sp_North.WGS84")
par(mai=c(0,0,0,0),bg='white' )
sco1<-readOGR("./Regional Boundaries Maps and Data/SWCQ1.WGS84//SWC_Q1.dbf", "SWC_Q1")
sco3<-readOGR("./Regional Boundaries Maps and Data/SWCQ4.WGS84//SWC_Q4.dbf", "SWC_Q4")
ices<-readOGR("./Regional Boundaries Maps and Data/ICES_rectangles_statistics/ICES_rectangles_statistics.dbf", "ICES_rectangles_statistics")
ire<-readOGR("./Regional Boundaries Maps and Data/IGFS.WGS84//IGFS.WGS84.dbf", "IGFS.WGS84")
ni<-readOGR("./Regional Boundaries Maps and Data/NI-IBTS.WGS84//NI_IBTS.WGS84.dbf", "NI_IBTS.WGS84")
evhoe<-readOGR("./Regional Boundaries Maps and Data/Fr-EVHOE.WGS84//EVHOE.WGS84.dbf", "EVHOE.WGS84")
rock<-readOGR("./Regional Boundaries Maps and Data/SWC-RockQ3.WGS84//SWC_Q3.dbf", "SWC_Q3")
summary(h$ShootLat)
summary(h$ShootLong)
#plot(contour_200m, add=TRUE, col="lightblue3")
#plot(contour_1000m, add=TRUE, col="lightblue4")
plot(NWW_boundary, border="white", xlim = c(-16,13), ylim = c(36, 62))
plot(europe,add=TRUE, xlim = c(-16, 13), ylim = c(36, 62), asp = 1, col=c('gray81'),
border='gray3')
# plot(contour_200m, add=TRUE, border="lightblue3")
# plot(contour_1000m, add=TRUE, border="lightblue4")
plot(ire, add=TRUE, lty=2, lwd=3, border="green")
plot(sco3, add=TRUE, lty=1, lwd=1, border="blue")
plot(evhoe, add=T, lty=1, lwd=1, border="red")
plot(ni, add=T, border="lightblue")
plot(rock, add=T, border="yellow")
dev.new()
plot(NWW_boundary, border="white", xlim = c(-16,13), ylim = c(36, 62))
plot(ices, add=TRUE)
plot(europe,add=TRUE, xlim = c(-16, 13), ylim = c(36, 62), asp = 1, col=c('gray81'),
border='gray3')
#text(ices$Centr_X, ices$Centr_Y, ices$ICESNAME)
plot(ospar, col="red", add=T, lwd=4)
check<-subset(hauls, !(hauls$check_StatRec%in%hauls$StatRec))
points(check$ShootLong, check$ShootLat, col="red")
dev.off()
# Station locations don't match because Spain uses Survey Strata - not a problem!
################
# 4.1.2 Depth #
################
hauls$EstDepth<-hauls$Depth
# 767 observations
# All observations with no depth have no haul positional data
# estimate depth on shoot position is the best we can do
library(marmap)
summary(hauls$ShootLat)
summary(as.numeric(hauls$ShootLong))
# use NOAA website to get bathy map
papoue <- getNOAA.bathy(lon1 = -16, lon2 = 13,
lat1 = 36, lat2 = 62, resolution = .5)
summary(papoue)
# make a pretty map of all the stations
png(file="./Diagnostics/Diagnostic_plots/surveydepthmap.png")
blues <- c("lightsteelblue4", "lightsteelblue3",
"lightsteelblue2", "lightsteelblue1")
greys <- c(grey(0.99), grey(0.95), grey(0.85))
plot(papoue, image = TRUE, land = FALSE, lwd = 0.03,
bpal = list(c(0, max(papoue), greys),
c(min(papoue), 0, blues)))
plot(papoue, n = 1, lwd = 0.04, add = TRUE)
cols<-heat_hcl(13, c = c(80, 30), l=c(30,90), power=c(1/5, 1.5))
hauls$Survey<-(as.factor(hauls$Survey))
points(hauls$ShootLong, hauls$ShootLat, pch=19,
cex=0.3, col=cols[hauls$Survey])
dev.off()
# get
hauls$ShootLong<-as.numeric(hauls$ShootLong)
NOAA_Depth<-get.depth(papoue, x=hauls$ShootLong, y=hauls$ShootLat, locator=FALSE)
hauls$HaulLong[hauls$UniqueID=="BTS-VIIa/2006/3/COR/80/BT4A"]<--5.368430
hauls$EstDepth<-(NOAA_Depth$depth*-1)
hauls$DepthNew<-hauls$Depth
hauls$DepthNew[is.na(hauls$Depth)]<-hauls$EstDepth[is.na(hauls$Depth)]
summary(hauls$DepthNew)
# make a graph of all the difference between estimated and recorded depths
png(file = "./Diagnostics/Diagnostic_plots/depth_differences-09-29-2016.png", bg = "transparent")
plot(hauls$Depth, hauls$EstDepth, pc=19, xlab="Recorded Depth (m)",
ylab="Estimated point depth (m)", ylim=c(0,800))
abline(a=0, b=1, col="red")
dev.off()
write.csv(papoue, "./Diagnostics/Diagnostic_plots/Bathy_map_09-29-2016.csv")
# add some diagnostics to check if the differences in depth are acceptable
png(file = "./Diagnostics/Diagnostic_plots/map_depth_differences.png", bg = "transparent")
plot(papoue, image = TRUE, land = FALSE, lwd = 0.03,
bpal = list(c(0, max(papoue), greys),
c(min(papoue), 0, blues)))
plot(papoue, n = 1, lwd = 0.04, add = TRUE)
cols<-heat_hcl(13, c = c(80, 30), l=c(30,90), power=c(1/5, 1.5))
hauls$Survey<-(as.factor(hauls$Survey))
hauls$Diff_dep<-sqrt((hauls$Depth-hauls$EstDepth)^2)
summary(hauls$Diff_dep)
radius<-sqrt(hauls$Diff_dep/pi)
symbols(hauls$ShootLong, hauls$ShootLat, circles=radius, inches=0.2, fg="white",
bg=cols[hauls$Survey], xlab="Longitude", ylab="Latitude", add=T)
legend.text<-c(1,10,100,1000, 2000)
x<-c(9,9,9,9,9)
y<-c(50,49.8, 49.5, 48.5,47.5)
symbols(x,y, circles=sqrt(legend.text/pi), inches=0.2, fg="black",
bg="grey", add=T)
text(9.9, 50, "1m")
text(9.9, 49.8, "10m")
text(9.9, 49.5, "100m")
text(9.9, 48.5, "1000m")
text(9.9, 47.5, "2000m")
dev.off()
png(file = "./Diagnostics/Diagnostic_plots/box_plot_depth_differences_survey_2-8-16.png", bg = "transparent")
plot(hauls$Survey,hauls$Diff_dep, col=cols[hauls$Survey], pch=19)
dev.off()
######################
# 4.1.3 Sweep Length #
######################
summary(as.factor(hauls$SweepLngt))
# Sweep Lenght Values are sometimes incorrect or missing,
# delete incorrect values
hauls$EstSweepLngt<-hauls$SweepLngt
hauls$EstSweepLngt[hauls$SweepLngt>121&hauls$Gear=="GOV"]<-"-9"
hauls$EstSweepLngt[is.na(hauls$SweepLngt)]<-"-9"
summary(as.factor(hauls$EstSweepLngt))
# find out extent of issue
sweepsummary<-ddply(hauls, c("Survey","Country", "Year", "Quarter", "EstSweepLngt"),
summarise, N=length(EstSweepLngt))
summary(as.factor(hauls$Survey))
# BTS Surveys don't record a sweep - should be NA
# ROT surveys - no sweep - should be NA
# NCT surveys - no sweeps
hauls$EstSweepLngt[hauls$Survey=="BTS"]<-NA
hauls$EstSweepLngt[hauls$Survey=="BTS-VIIa"]<-NA
hauls$EstSweepLngt[hauls$Survey=="NIGFS"]<-NA
hauls$EstSweepLngt[hauls$Survey=="PT-IBTS"]<-NA
# in 1983/1984 no country recorded sweep, but in 1985+ countries
# reported using "Recommended Sweeps" so gonna apply the 60/110 rule to these
# years
hauls$EstSweepLngt[hauls$Survey=="NS-IBTS"&
hauls$EstSweepLngt=="-9"& hauls$Quarter=="1" &
hauls$Year=="1983" & hauls$DepthNew<76]<-60
hauls$EstSweepLngt[hauls$Survey=="NS-IBTS"&
hauls$EstSweepLngt=="-9"& hauls$Quarter=="1" &
hauls$Year=="1983" &hauls$DepthNew>75]<-110
hauls$EstSweepLngt[hauls$Survey=="NS-IBTS"&
hauls$EstSweepLngt=="-9"& hauls$Quarter=="1" &
hauls$Year=="1984" &hauls$DepthNew<76]<-60
hauls$EstSweepLngt[hauls$Survey=="NS-IBTS"&
hauls$EstSweepLngt=="-9"& hauls$Quarter=="1" &
hauls$Year=="1984" & hauls$DepthNew>75]<-110
# Denmark: Apply standard as in Manual
hauls$EstSweepLngt[hauls$Survey=="NS-IBTS"&
hauls$EstSweepLngt=="-9"& hauls$Quarter=="1" &
hauls$Country=="DEN" & hauls$DepthNew<76]<-60
hauls$EstSweepLngt[hauls$Survey=="NS-IBTS"&
hauls$EstSweepLngt=="-9"& hauls$Quarter=="1" &
hauls$Country=="DEN" & hauls$DepthNew>75]<-110
hauls$EstSweepLngt[hauls$Survey=="NS-IBTS"&
hauls$EstSweepLngt=="-9"& hauls$Year<2005 &
hauls$Quarter=="3" & hauls$Country=="DEN"]<-60
hauls$EstSweepLngt[hauls$Survey=="NS-IBTS"&
hauls$EstSweepLngt=="-9"& hauls$Year>2004 &
hauls$Quarter=="3" & hauls$Country=="DEN"&
hauls$DepthNew<76]<-60
hauls$EstSweepLngt[hauls$Survey=="NS-IBTS"&
hauls$EstSweepLngt=="-9"& hauls$Quarter=="3"
& hauls$Country=="DEN" & hauls$Year>2004 &
hauls$DepthNew>75]<-110
# Germany next - Q1 and Q 3 rules differ
hauls$EstSweepLngt[hauls$Survey=="NS-IBTS"&
hauls$EstSweepLngt=="-9"& hauls$Quarter=="1" &
hauls$Country=="GFR" & hauls$DepthNew<76]<-60
hauls$EstSweepLngt[hauls$Survey=="NS-IBTS"&
hauls$EstSweepLngt=="-9"& hauls$Quarter=="1" &
hauls$Country=="GFR" & hauls$DepthNew>75]<-110
hauls$EstSweepLngt[hauls$Survey=="NS-IBTS"&
hauls$EstSweepLngt=="-9"& hauls$Year<2004 &
hauls$Quarter=="3" & hauls$Country=="GFR"]<-60
#Netherlands next only 2 years missing data in quarter 1
hauls$EstSweepLngt[hauls$Survey=="NS-IBTS"&
hauls$EstSweepLngt=="-9"&
hauls$Quarter=="1" & hauls$Country=="NED"]<-60
#Norway next
hauls$EstSweepLngt[hauls$Survey=="NS-IBTS"&
hauls$EstSweepLngt=="-9"&
hauls$Quarter=="3" & hauls$Country=="NOR"]<-60
hauls$EstSweepLngt[hauls$Survey=="NS-IBTS"&
hauls$EstSweepLngt=="-9"& hauls$Quarter=="1" &
hauls$Country=="NOR" & hauls$DepthNew<76]<-60
hauls$EstSweepLngt[hauls$Survey=="NS-IBTS"&
hauls$EstSweepLngt=="-9"& hauls$Quarter=="1" &
hauls$Country=="NOR" & hauls$DepthNew>75]<-110
# Sweden
hauls$EstSweepLngt[hauls$Survey=="NS-IBTS"&
hauls$EstSweepLngt=="-9"&
hauls$Quarter=="3" & hauls$Country=="SWE"]<-60
hauls$EstSweepLngt[hauls$Survey=="NS-IBTS"&
hauls$EstSweepLngt=="-9"& hauls$Quarter=="1" &
hauls$Country=="SWE" & hauls$DepthNew<76]<-60
hauls$EstSweepLngt[hauls$Survey=="NS-IBTS"&
hauls$EstSweepLngt=="-9"& hauls$Quarter=="1" &
hauls$Country=="SWE" & hauls$DepthNew>75]<-110
# Scotland
hauls$EstSweepLngt[hauls$Survey=="NS-IBTS"&
hauls$EstSweepLngt=="-9"&
hauls$Quarter=="1" & hauls$Country=="SCO"]<-60
# SWC Survey
hauls$EstSweepLngt[hauls$Survey=="SWC-IBTS"&
hauls$EstSweepLngt=="-9"& hauls$Year<2011]<-60
# Rockall
hauls$EstSweepLngt[hauls$Survey=="ROCKALL"&
hauls$EstSweepLngt=="-9"& hauls$Year<2011]<-60
# Check all new sweeps
sweepsummary<-ddply(hauls, c("Survey","Country", "Year", "Quarter", "EstSweepLngt"),
summarise, N=length(EstSweepLngt))
#Assign cat of short or long to sweep length
summary(as.factor(hauls$EstSweepLngt))
hauls$EstSweepCat<-hauls$EstSweepLngt
hauls$EstSweepCat[hauls$EstSweepLngt<61]<-"short"
hauls$EstSweepCat[hauls$EstSweepLngt==97|hauls$EstSweepLngt==110|
hauls$EstSweepLngt==100|hauls$EstSweepLngt==120]<-"long"
sweepcatsummary<-ddply(hauls, c("Survey","Country", "Year", "Quarter", "EstSweepCat"),
summarise, N=length(EstSweepCat))
#######################
# 4.1.4 Haul Duration #
#######################
# Check range >66 and <13 not within bounds
summary(hauls$HaulDur)
######################
# 4.1.5 Ground Speed #
######################
### Draw some plots to look at data
#make a plot to look at the current groundspeed*time against distance
# if perfect we expect an intercept of 0 and a slope of 1
png(file = "./Diagnostics/Diagnostic_plots/distance_speed_time_differences_29-09-2016.png", bg = "transparent")
par(xpd=FALSE)
plot(hauls$GroundSpeed*1852/60*hauls$HaulDur, hauls$Distance,
pch=19, col="black", cex=0.5, xlab="Speed X Time", ylab="Distance")
abline(a=0, b=1, col="lightgrey", lwd=2)
dev.off()
png(file="groundspeed_diagnostics.png", bg="transparent")
plot(hauls$HaulDur, hauls$GroundSpeed, pch=19, xlab="Time", ylab="Speed (knots)")
abline(h=6, col="red")
abline(h=2, col="red")
dev.off()
# Change the confidence interval fill color
png(file = "./Diagnostics/Diagnostic_plots/distance_speed_time_differences_with_CI_29-09-2016.png", bg = "transparent")
p1<-ggplot(hauls, aes(x=hauls$GroundSpeed*1852/60*hauls$HaulDur,
y=hauls$Distance)) +
geom_point(shape=18, color="black")+
geom_smooth(method=lm, linetype="dashed",
color="lightgrey", fill="darkgrey", se=TRUE, fullrange=FALSE, level=.95)
p1 + scale_color_grey()+theme_classic()
dev.off()
# check ground speed
png(file="./Diagnostics/Diagnostic_plots/groundspeed_boxplot_29-09-2016.png", bg="transparent")
plot(hauls$Survey, hauls$GroundSpeed, pch=19, xlab="Survey",
ylab="GroundSpeed (knots)", col="lightgrey")
dev.off()
# outliers in Groundspeed found
check<-hauls[hauls$GroundSpeed<3|hauls$GroundSpeed>5, ]
png(file="./Diagnostics/Diagnostic_plots/groundspeed_distance_comparision_29-09-2016.png", bg="transparent")
plot(check$GroundSpeed*check$HaulDur*1852/60, check$Distance, pch=19, col="black")
dev.off()
# In Ns-IBTS 1995 it seems some of the french ground speeds are in the Speed Water Column
# and the Speed water is in the Ground Speed Col.
hauls$GroundSpeed[hauls$UniqueID=="NS-IBTS/1995/1/THA/2/GOV"]<-4
hauls$SpeedWater[hauls$UniqueID=="NS-IBTS/1995/1/THA/2/GOV"]<-2
hauls$GroundSpeed[hauls$UniqueID=="NS-IBTS/1995/1/THA/3/GOV"]<-4
hauls$SpeedWater[hauls$UniqueID=="NS-IBTS/1995/1/THA/3/GOV"]<-2
hauls$GroundSpeed[hauls$UniqueID=="NS-IBTS/1995/1/THA/6/GOV"]<-4
hauls$GroundSpeed[hauls$UniqueID=="NS-IBTS/1995/1/THA/9/GOV"]<-3.8
hauls$SpeedWater[hauls$UniqueID=="NS-IBTS/1995/1/THA/9/GOV"]<-1.9
hauls$GroundSpeed[hauls$UniqueID=="NS-IBTS/1995/1/THA/11/GOV"]<-3.9
hauls$SpeedWater[hauls$UniqueID=="NS-IBTS/1995/1/THA/11/GOV"]<-1.9
# in three cases the ground speed is recorded as zero - change to NA
list<-c('SP_ARSA/1995/1/CDS/23/BAK', 'SP_ARSA/1995/1/CDS/25/BAK',
'SP_ARSA/1995/1/CDS/30/BAK')
hauls$GroundSpeed[hauls$UniqueID%in%list]<-NA
# really fast ground speed check outlat long distance matches but in others it was way out
#check<-hauls[hauls$GroundSpeed<3|hauls$GroundSpeed>5
# GROUNDSPEED NOT RECORDED BUT DISTANCE AND DURATION RECORDED
hauls$Realised_groundspeed<-hauls$Distance/hauls$HaulDur/1852*60
summary(hauls$Estimated_groundspeed)
png(file="./Diagnostics/Diagnostic_plots/groundspeed_predictedVdistance_divided_by_time_29-09-2016.png", bg="transparent")
plot(hauls$Realised_groundspeed, hauls$GroundSpeed,
pch=19, col="lightgrey",
xlim=c(1,6), ylim=c(1,6))
abline(0,1, col="black")
dev.off()
png(file = "./Diagnostics/Diagnostic_plots/RecordedVRealisedGS_29-09-2016.png", bg = "transparent")
plot(hauls$GroundSpeed, hauls$Distance/hauls$HaulDur/1852*60,
pch=19, col="black", cex=1, xlab="Recorded Groundspeed (knots)",
ylab="Realised Groundspeed (knots)", xlim=c(0,9), ylim=c(0,9))
abline(a=0, b=1, col="lightgrey", lwd=2)
draw.circle(4,7.5, 1.2, border="red", lwd=2)
abline(h=6, col="lightgrey", lwd=1, lty=2)
abline(v=6, col="lightgrey", lwd=1, lty=2)
abline(h=2, col="lightgrey", lwd=1, lty=2)
abline(h=1, col="lightgrey", lwd=1, lty=2)
draw.circle(8.4, 8.4, 0.1, border="red", lty=4, lwd=2)
dev.off()
# no surprises here!
# using model with r squared of ~0.74 and good AIC score, which makes sense in
# mechanical terms
gs_model1<-lm(GroundSpeed~Quarter:Ship:Gear, data=hauls)
# pretty decent model here - problem is it won't work for all ships as I have some
# ships with no data at all on Ground speed to inform model
summary(hauls$GroundSpeed)
needSpeed <- hauls[is.na(hauls$GroundSpeed),]
needSpeedShip <- unique(needSpeed$Ship)
withSpeedShip <- unique(hauls$Ship[!is.na(hauls$GroundSpeed)])
needSpeedGear<-unique(needSpeed$Gear)
# ID ships that have some missing GroundSpeed records
canSpeedShip <- needSpeedShip[needSpeedShip %in% withSpeedShip]
canSpeed <- hauls[hauls$Ship %in% canSpeedShip, ]
# Split the data into the two types: only a few missing and completely missing
canSpeedNA <- canSpeed[is.na(canSpeed$GroundSpeed) & is.na(canSpeed$Distance),]
canSpeedOK <- canSpeed[!is.na(canSpeed$GroundSpeed),]
gs_model2<-lm(GroundSpeed~Quarter:Gear, data=hauls)
AIC(gs_model1, gs_model2)
anova(gs_model1,gs_model2)
#predictedGS<-predict(gs_model2, hauls, allow.new.levels=F)
# allmissing observations
canSpeed<-subset(canSpeed, !canSpeed$Gear=="ROT",)
canSpeed$predicted_groundspeed_gs1<-predict(gs_model1, canSpeed , allow.new.levels=T)
summary(canSpeed$predicted_groundspeed_gs1)
plot(canSpeed$predicted_groundspeed_gs1,canSpeed$GroundSpeed, pch=19, col='grey' )
gs5_dat<-subset(hauls, !hauls$Gear=="ROT",)
gs5_dat$predicted_groundspeed_gs2<-predict(gs_model2, gs5_dat , allow.new.levels=T)
summary(gs5_dat$predicted_groundspeed_gs2)
summary(canSpeed$predicted_groundspeed_gs1)
# attach predictions to the hauls dataset
list<-canSpeed$UniqueID
hauls$predicted_groundspeed_gs1[hauls$UniqueID%in%list]<-canSpeed$predicted_groundspeed_gs1[hauls$UniqueID%in%list]
list<-gs5_dat$UniqueID
hauls$predicted_groundspeed_gs2[hauls$UniqueID%in%list]<-gs5_dat$predicted_groundspeed_gs2[hauls$UniqueID%in%list]
# If real data available use that
hauls$GroundSpeed_Used<-hauls$GroundSpeed
hauls$GroundSpeed_Used[hauls$GroundSpeed_Used<2]<-NA
hauls$GroundSpeed_Used[hauls$GroundSpeed_Used>6]<-NA
hauls$GroundSpeed_Used[hauls$UniqueID=="IE-IGFS/2015/4/CEXP/100/GOV"]<-NA
hauls$GroundSpeed_Quality_Code[!is.na(hauls$GroundSpeed_Used)]<-"Recorded_Groundspeed"
# If gear is ROT then no data ever available
hauls$GroundSpeed_Quality_Code[hauls$Gear=="ROT"]<-"Manual_Speed"
hauls$GroundSpeed_Used[hauls$Gear=="ROT"]<-4
# If ground speed available for Ship and Gear - use that
hauls$GroundSpeed_Quality_Code[is.na(hauls$GroundSpeed_Used)]<-"model_1"
hauls$GroundSpeed_Used[is.na(hauls$GroundSpeed_Used)]<-hauls$predicted_groundspeed_gs1[is.na(hauls$GroundSpeed_Used)]
# if ground speed available for Gear
summary(hauls$predicted_groundspeed_gs2)
hauls$GroundSpeed_Quality_Code[is.na(hauls$GroundSpeed_Used)]<-"model_2"
hauls$GroundSpeed_Used[is.na(hauls$GroundSpeed_Used)]<-hauls$predicted_groundspeed_gs2[is.na(hauls$GroundSpeed_Used)]
# If no model fits use Manual speed
hauls$GroundSpeed_Quality_Code[is.na(hauls$GroundSpeed_Used)]<-"Manual_Speed"
hauls$GroundSpeed_Used[is.na(hauls$GroundSpeed_Used)]<-4
summary(hauls$GroundSpeed_Used)
summary(as.factor(hauls$GroundSpeed_Quality_Code))
hauls[, GroundSpeed_meters_per_min := GroundSpeed_Used * 1852 / 60]
summary(hauls$GroundSpeed_meters_per_min)
summary(hauls$GroundSpeed_Used[hauls$Country=="SPA"])
# Spain within expected bounds
summary(hauls$GroundSpeed_Used[hauls$Country=="POR"])
# Portugal within expected bounds
summary(hauls$GroundSpeed_Used[hauls$Gear=="BT7"])
summary(hauls$GroundSpeed_Used[hauls$Gear=="BT8"])
summary(hauls$GroundSpeed_Used[hauls$Gear=="BT4A"])
summary(hauls$GroundSpeed_Used[hauls$Gear=="GOV"])
summary(hauls$GroundSpeed_Used[hauls$Gear=="ROT"])
#get equation for gs1
formula(gs_model1)
summary(gs_model1)
coeffs=(coefficients(gs_model1))
formula(gs_model2)
coeffs=(coefficients(gs_model2))
# draw a graph showing predicted v actual ground speeds.
png(file="groundspeed_predictedVactual_29-09-2016.png", bg="transparent")
predict<-predict(gs_model1)
gs<-subset(hauls, !is.na(hauls$GroundSpeed),)
plot(predict, gs$GroundSpeed, xlab="Predicted GroundSpeed (knots)",
ylab="Recorded Groundspeed (knots)", pch=19)
predict1<-predict(gs_model2)
points(predict1, gs$GroundSpeed, pch=19, col="lightgrey")
abline(0,1, col="red")
dev.off()
########################
# 4.1.6 Towed Distance #
########################
## Calculate haversine distance with shoot and haul coordinates ##
hauls[, LatLongDistance := earth.dist(long1 = ShootLong,
lat1 = ShootLat,
long2 = HaulLong,
lat2 = HaulLat) * 1000]
## RAW DISTANCE ##
hauls[!is.na(Distance), c("newDist", "qualityDistance") :=
list(Distance, "rawDistance")]
## HAVERSINE DISTANCE ##
# if haul and shoot coordinates are the same (i.e., equal to zero) then leave as NA
# Also ingore hauls with funny implied speed.
hauls[is.na(Distance) & !is.na(LatLongDistance) & LatLongDistance > 1 &
LatLongDistance/HaulDur>61.73 & LatLongDistance/HaulDur<185.2 ,
c("newDist", "qualityDistance") :=
list(LatLongDistance, "LatLongDistance")]
## DURATION X SPEED ##
# HaulDur x GroundSpeed
hauls[, SpeedTimeDist := hauls$GroundSpeed_meters_per_min*HaulDur]
hauls[ !is.na(SpeedTimeDist) &is.na(newDist)&
SpeedTimeDist/HaulDur>61.73 & SpeedTimeDist/HaulDur<185.2,
c("newDist", "qualityDistance") :=
list( SpeedTimeDist, "SpeedHaulDur")]
# Hauls that don't have raw distance, shoot/haul coordinates, or GroundSpeed can be estimated
# using linear models to predict GroundSpeed. Two different types of missing data are found:
# 1) ships that have no GroundSpeed records and we need to estimate from other ships, and
# 2) ships that have only a few missing GroundSpeed records and we can use ship as a factor in the lm
# Model for estimating Ground Speed has been reviewed based on feedback
# from the IBTS working Group (V.T)
summary(as.numeric(hauls$newDist))
# check new distances relationships
png(file="distance_speed_time_differences.png", bg = "transparent")
par(xpd=FALSE)
plot(hauls$SpeedTimeDist, hauls$newDist,
pch=19, col="black", cex=0.5, xlab="Speed X Time", ylab="Distance")
abline(a=0, b=1, col="lightgrey", lwd=2)
dev.off()
# Check distances within bounds
png(file="distance_speed_time_differences_bounds.png", bg = "transparent")
par(xpd=FALSE)
plot(hauls$SpeedTimeDist, hauls$newDist,
pch=19, col="black", cex=0.5, xlab="Speed X Time", ylab="Distance")
abline(a=0, b=1, col="lightgrey", lwd=2)
abline(a=0, b=.5, col="red", lwd=2, lty=2)
abline(a=0, b=1.5, col="red", lwd=2, lty=2)
dev.off()
############################
plot(hauls$HaulDur, hauls$newDist, pch=19, cex=0.5, xlab="Time", ylab="Distance")
# perfect speed - 4 knots, bounds 2- 6 knots
x<-c(13:66)
points(x, x*4*1852/60, type="l", col="red", lwd=3)
points(x, x*2*1852/60, type="l", col="red", lty=2, lwd=2)
points(x, x*6*1852/60, type="l", col="red", lty=2, lwd=2)
# Set up extra check col
hauls$manual_speed_distance<-hauls$HaulDur*4*1852/60
# distance checks required as some of them are really odd.
hauls$LatLongDistance[hauls$LatLongDistance==0]<-NA
outside_bounds<-subset(hauls, hauls$newDist/hauls$manual_speed_distance>1.50|
hauls$newDist/hauls$manual_speed_distance<.5, )
points(outside_bounds$HaulDur,outside_bounds$newDist, col="red", pch=19)
# only 127 to check :)
# if haversine is available use that
list<-outside_bounds$UniqueID
hauls$newDist[hauls$UniqueID%in%list]<-hauls$LatLongDistance[hauls$UniqueID%in%list]
hauls$qualityDistance[hauls$UniqueID%in%list]<-"LatLongDistance"
# recheck
outside_bounds<-subset(hauls, hauls$newDist/hauls$manual_speed_distance>1.50|
hauls$newDist/hauls$manual_speed_distance<.5, )
points(outside_bounds$HaulDur,outside_bounds$newDist, col="red", pch=19)
# still not gone, use speed*time
list<-outside_bounds$UniqueID
hauls$newDist[hauls$UniqueID%in%list]<-hauls$SpeedTimeDist[hauls$UniqueID%in%list]
hauls$qualityDistance[hauls$UniqueID%in%list]<-"SpeedHaulDur"
# recheck # all within bounds now
# next refine estimates
# is distance used within 20% of lat long distance
check__dist_haversine_match<-subset(hauls, hauls$newDist/hauls$LatLongDistance>1.2|
hauls$newDist/hauls$LatLongDistance<.8,)
points(check_haversine_match$HaulDur,check_haversine_match$newDist, col="red", pch=19)
# so 1433 points more than 20% away - all rest are fine
check_dist_speed_match<-subset(check__dist_haversine_match,
check__dist_haversine_match$newDist/check__dist_haversine_match$SpeedTimeDist>1.2|
check__dist_haversine_match$newDist/check__dist_haversine_match$SpeedTimeDist<.8,)
# 121 not okay with speed
points(check_speed_match$HaulDur,check_speed_match$newDist, col="yellow", pch=19)
check_lat_speed_match<-subset(check_dist_speed_match,
check_dist_speed_match$LatLongDistance/check_dist_speed_match$SpeedTimeDist<1.2&
check_dist_speed_match$LatLongDistance/check_dist_speed_match$SpeedTimeDist>.8,)
# so 45 of the haversine and speed X time are within 20% - use the lat long rather than the
# recorded distance
list<-check_lat_speed_match$UniqueID
hauls$newDist[hauls$UniqueID%in%list]<-hauls$LatLongDistance[hauls$UniqueID%in%list]
hauls$qualityDistance[hauls$UniqueID%in%list]<-"LatLongDistance"
# So whats left
check_no_match<-subset(check_dist_speed_match,
check_dist_speed_match$LatLongDistance/check_dist_speed_match$SpeedTimeDist>1.2|
check_dist_speed_match$LatLongDistance/check_dist_speed_match$SpeedTimeDist<.8,)
points(check_no_match$HaulDur,check_no_match$newDist, col="green", pch=19)
# Does value lie within +/- 25% of Man Speed
check_within_bounds<-subset(check_no_match,
check_no_match$newDist/check_no_match$manual_speed_distance>1.25|
check_no_match$newDist/check_no_match$manual_speed_distance<.75, )
# check all distances available?
summary(hauls$newDist)
# problem with some lat long distances not available
# Use manual speed
hauls$qualityDistance[is.na(hauls$newDist)]<-"SpeedHaulDur"
hauls$newDist[is.na(hauls$newDist)]<-hauls$SpeedTimeDist[is.na(hauls$newDist)]
# all vaules within accepted bounds.
png(file="distances_cleaned_29-09-2016.png", bg = "transparent")
plot(hauls$HaulDur, hauls$newDist, pch=19, cex=0.5, xlab="Time", ylab="Distance")
# perfect speed - 4 knots, bounds 2- 6 knots
x<-c(13:66)
points(x, x*4*1852/60, type="l", col="red", lwd=3)
points(x, x*2*1852/60, type="l", col="red", lty=2, lwd=2)
points(x, x*6*1852/60, type="l", col="red", lty=2, lwd=2)
dev.off()
# all distances in newDist are withing acceptable ranges the best estimate is applied in
# each situation
write.csv(hauls, "Working_hauls_file_29_09_2016.csv")
# remove all the intermediate files
summary(as.factor(hauls$qualityDistance))
#############################################
# 4.1.7 Otter Trawl Net Geometry Parameters #
#############################################
#######################
# 4.1.7.1 Wing Spread #
#######################
###########################################
# 4.1.7.1.1 The GOV Otter Trawl (Model 1) #
###########################################
# remove outlier from irish data
summary(hauls$WingSpread[hauls$Country=="IRL"])
# 38 too big
hauls$WingSpread[hauls$Country=="IRL"&hauls$WingSpread==38]<-NA
#Centered data for model
str(hauls)
# Add Sweep Cats - See Est Sweep Cats above.
hauls[!is.na(Year), "Yearfac":=
as.factor(Year)]
hauls[!is.na(Quarter), "Qfac":=
as.factor(Quarter)]
hauls[!is.na(Ship), "Shipfac":=
as.factor(Ship)]
summary(as.factor(hauls$EstSweepCat[hauls$Gear=="GOV"]))
options(show.signif.stars = T)
library(lmerTest)
#Center data to give model stability
hauls[!is.na(DepthNew), c("meanDepth") :=
mean(DepthNew)]
hauls[!is.na(WingSpread), c("meanWingSpread") :=
mean(WingSpread)]
hauls[!is.na(Netopening), c("meanNetopening") :=
mean(Netopening)]
hauls[!is.na(DoorSpread), c("meanDoorSpread"):=
mean(DoorSpread)]
hauls[!is.na(Depth), c("DepthCenter") :=
Depth-meanDepth]
hauls[!is.na(DepthNew), c("LogDepthCenter") :=
log(DepthNew)-log(meanDepth)]
hauls[!is.na(WingSpread), c("WingSpreadCenter") :=
WingSpread-meanWingSpread]
hauls[!is.na(DoorSpread), c("DoorSpreadCenter") :=
DoorSpread-meanDoorSpread]
hauls[!is.na(WingSpread), c("LogWingSpreadCenter") :=
log(WingSpread)-log(meanWingSpread)]
hauls[!is.na(DoorSpread), c("LogDoorSpreadCenter") :=
log(DoorSpread)-log(meanDoorSpread)]
hauls[!is.na(Netopening), c("NetopeningCenter") :=
Netopening-meanNetopening]
hauls[!is.na(Netopening), c("LogNetopeningCenter") :=
log(Netopening)-log(meanNetopening)]
hauls$SweepCat<-as.factor(hauls$SweepLngt)
summary(as.factor(hauls$Ship))
train<-subset(hauls, Gear=="GOV"& (!is.na(WingSpread)) & (!is.na(DoorSpread))
&(!is.na(Netopening)),)
ws_model1<- lmer(log(WingSpread) ~ LogDepthCenter:EstSweepCat
+ (1|Ship:EstSweepCat),
data=train, REML=FALSE)
r.squaredGLMM(ws_model1)
summary(train$Ship)
cols<-rainbow(6)
data<-predict(ws_model1)
summary(as.factor(train$Survey))
summary(train$Survey)
cols<-rainbow(13)
png(file="GOV_Wingspreads_model1_29-09-2016.png", bg="transparent")
plot(train$DepthNew, train$WingSpread, col=cols[as.factor(train$Survey)],
pch=15, xlab="Depth (m)", ylab="WingSpread(m)")
points(train$DepthNew, exp(data), pch=21, col=cols[as.factor(train$Survey)],
bg="black")
lines(lowess(train$WingSpread~train$DepthNew), col="black", lwd=2)
lines(lowess(exp(data)~train$DepthNew), col="red", lwd=3, lty=2)
legend(300, 40, levels(as.factor(hauls$Survey[hauls$Gear=="GOV"])),
col=cols, pch=15, ncol=3, cex=1, bty="o")
dev.off()
# set up user data using selected model
# subset all GOV gear
hauls$EstSweepCat<-as.factor(hauls$EstSweepCat)
hauls$Ship<-as.factor(hauls$Ship)
the_gov<-subset(hauls, Gear=="GOV")
summary(the_gov$WingSpread)
summary(the_gov$LogDoorSpreadCenter)
str(the_gov)
# 30442 obs
the_gov$mod1_wingspread_gov<-exp(predict(ws_model1, the_gov, allow.new.levels=T))
summary(the_gov$mod1_wingspread_gov)
# If real values are available use these
hauls$Use_WingSpread[!is.na(hauls$WingSpread)&
hauls$Gear=="GOV"]<-hauls$WingSpread[!is.na(hauls$WingSpread)&
hauls$Gear=="GOV"]
hauls$QualityWing[!is.na(hauls$WingSpread)]<-"raw_wingspread"
list<-the_gov$UniqueID
hauls$mod1_wingspread_gov[hauls$UniqueID%in%list]<-the_gov$mod1_wingspread_gov[the_gov$UniqueID%in%list]
hauls$Use_WingSpread[is.na(hauls$WingSpread)&
hauls$Gear=="GOV"]<-hauls$mod1_wingspread_gov[is.na(hauls$WingSpread)&
hauls$Gear=="GOV"]
hauls$QualityWing[is.na(hauls$WingSpread)&!is.na(hauls$mod1_wingspread_gov)&
hauls$Gear=="GOV"]<-"mod1_wing_gov"
#check wing speads
summary((hauls$Use_WingSpread[hauls$Gear=="GOV"]))
summary(hauls$Use_WingSpread)
# get model outputs
formula(ws_model1)
summary(ws_model1)
coeffs=(coefficients(ws_model1))
#####################################
# 4.1.7.1.2 The ROT Trawl (Model 2) #
#####################################
summary(hauls$WingSpread[hauls$Gear=="ROT"])
# 2333 estimated values needed
# Matt sent me some trail wingspreads to help to model these data
# but data now available in DATRAS file 20 values available
# lets look at these data first
png(file = "wingspread_ROT_29-09-2016.png", bg = "transparent")
plot(hauls$DepthNew[hauls$Gear=="ROT"],hauls$WingSpread[hauls$Gear=="ROT"],
pch=19, xlab="Depth (m)",
ylab="Wingspread (m)" )
abline(lm(hauls$WingSpread[!is.na(hauls$WingSpread)&hauls$Gear=="ROT"]~
hauls$DepthNew[!is.na(hauls$WingSpread)&hauls$Gear=="ROT"]),
col="lightgrey")
dev.off()
# given how data poor the situation is this must be kept very simple
ws_dat<-subset(hauls, !is.na(hauls$WingSpread) & hauls$Gear=="ROT", )
# need doorspread first
# Great we can keep this really simple and have a really strong
# Adjusted R sqd of 0.952
# has the best AIC score (-153.47.6)
hauls$mod2_wingspread_rot[hauls$Gear=="ROT"]<-exp(0.3798356+0.6489731*log(hauls$Use_DoorSpread[hauls$Gear=="ROT"]))
# set up user values for doorspread
#RAW WINGSPREAD
hauls$Use_WingSpread[!is.na(hauls$WingSpread)&hauls$Gear=="ROT"]<-hauls$WingSpread[!is.na(hauls$WingSpread)&hauls$Gear=="ROT"]
hauls$QualityWing[!is.na(hauls$WingSpread)&hauls$Gear=="ROT"]<-"raw_wingspread"
hauls$Use_WingSpread[is.na(hauls$WingSpread)&hauls$Gear=="ROT"]<-hauls$mod2_wingspread_rot[is.na(hauls$WingSpread)&hauls$Gear=="ROT"]
hauls$QualityWing[is.na(hauls$WingSpread)&hauls$Gear=="ROT"]<-"model_wingspread_rot"
summary(hauls$mod2_wingspread_rot[hauls$Gear=="ROT"])
summary(hauls$Use_WingSpread[hauls$Gear=="ROT"])
#########################################
# 4.1.7.1.3 The BAK Trawl (Model 3 & 4) #
#########################################
# set up data set for model selection
spain<-hauls[!is.na(hauls$DoorSpread) & !is.na(hauls$WingSpread)
& !is.na(hauls$Netopening)&hauls$Country=="SPA", ]
plot(spain$WingSpread, spain$DoorSpread, pch=19, col=cols[as.factor(spain$Gear)])
plot(spain$DepthNew, spain$WingSpread, pch=19,col=cols[as.factor(spain$Gear)])
# 80 observations in dataset.
door_dat<-hauls[!is.na(spain$DoorSpread),]
wing_dat<-spain[!is.na(spain$WingSpread),]
summary(wing_dat$DoorSpread)
model3<-lm(log(WingSpread) ~ LogDoorSpreadCenter:SweepCat,data=wing_dat)
summary(model3)
predict<-exp(predict(model3))
col1<-c( "darkred","darkgreen")
points(wing_dat$DepthNew, predict, col=col1[as.factor(wing_dat$Gear)], pch=19)
# first I need all the centered and logged data for hauls
str(hauls)
#Get Means
hauls$meanDepth<-mean(hauls$DepthNew)
mean(hauls$WingSpread[!is.na(hauls$WingSpread)])
hauls$meanWingSpread[!is.na(hauls$WingSpread)]<-13.82127
mean(hauls$Netopening[!is.na(hauls$Netopening)])
hauls$meanNetopening[!is.na(hauls$Netopening)]<-3.526697
mean(hauls$DoorSpread[!is.na(hauls$DoorSpread)])
hauls$meanDoorSpread[!is.na(hauls$DoorSpread)]<-59.74916
# Get Centered data
hauls$DepthCenter<-hauls$DepthNew-hauls$meanDepth
hauls$WingSpreadCenter<-hauls$WingSpread-hauls$meanWingSpread
hauls$DoorSpreadCenter<-hauls$DoorSpread-hauls$meanDoorSpread
hauls$LogDoorSpreadCenter<-hauls$Netopening-hauls$meanNetopening
# Log and Center the Data
hauls$LogDepthCenter<-log(hauls$DepthNew)-log(hauls$meanDepth)
hauls$LogWingSpreadCenter<-log(hauls$WingSpread)-log(hauls$meanWingSpread)
hauls$LogDoorSpreadCenter<-log(hauls$DoorSpread)-log(hauls$meanDoorSpread)
hauls$LogNetopeningCenter<-log(hauls$Netopening)-log(hauls$meanNetopening)
hauls$SweepCat<-as.factor(hauls$SweepLngt)
summary(as.factor(hauls$Ship))
# model3: model3<-lm(log(WingSpread) ~ LogDoorSpreadCenter:SweepCat,data=alldat)
wing_dat<-subset(hauls, Country=="SPA"& !is.na(DoorSpread),)
model3<-lm(log(WingSpread) ~ LogDoorSpreadCenter:SweepCat,
data=wing_dat)
summary(model3)
wing_dat_len <- nrow(wing_dat)
wing_dat$predict_lm_wing_dat<-exp(predict(model3, wing_dat, allow.new.levels=T))
summary(wing_dat$predict_lm_wing_dat)
plot(wing_dat$DepthNew,wing_dat$predict_lm_wing_dat,
pch=19, col=cols[as.factor(wing_dat$Gear)])
# model2: fm2<-lm(log(WingSpread) ~ LogDepthCenter:SweepCat:Gear,data=alldat)
wing_dat2<-subset(hauls, Country=="SPA",)
model4<-lm(log(WingSpread) ~ LogDepthCenter:SweepCat,data=wing_dat2)
summary(model4)
wing_dat_len <- nrow(wing_dat2)
wing_dat2$predict_lm_wing_dat<-exp(predict(model4, wing_dat2, allow.new.levels=T))
summary(wing_dat2$predict_lm_wing_dat)
plot(wing_dat2$DepthNew,wing_dat2$predict_lm_wing_dat,
pch=19, col=cols[as.factor(wing_dat2$Gear)])
list<-wing_dat2$UniqueID
formula(model4)
summary(model4)
coeffs=(coefficients(model4))
# predict results for spains wing spread data
hauls$Use_WingSpread[!is.na(hauls$WingSpread)&hauls$Country=="SPA"]<-hauls$WingSpread[!is.na(hauls$WingSpread)&hauls$Country=="SPA"]
hauls$QualityWing[!is.na(hauls$WingSpread)&hauls$Country=="SPA"]<-"raw_wingspread"
list<-wing_dat$UniqueID
hauls$mod1_wingspread_spa[hauls$UniqueID%in%list]<-wing_dat$predict_lm_wing_dat[wing_dat$UniqueID%in%list]
list<-wing_dat2$UniqueID
hauls$mod2_wingspread_spa[hauls$Country=="SPA"]<-wing_dat2$predict_lm_wing_dat[wing_dat$UniqueID%in%list]
hauls$Use_WingSpread[is.na(hauls$WingSpread)&hauls$Country=="SPA"]<-hauls$mod1_wingspread_spa[is.na(hauls$WingSpread)&hauls$Country=="SPA"]
hauls$QualityWing[is.na(hauls$WingSpread)&!is.na(hauls$mod1_wingspread_spa)&
hauls$Country=="SPA"]<-"mod3_wingspread_spa"
hauls$Use_WingSpread[is.na(hauls$Use_WingSpread)&
hauls$Country=="SPA"]<-hauls$mod2_wingspread_spa[is.na(hauls$Use_WingSpread)&hauls$Country=="SPA"]
hauls$QualityWing[is.na(hauls$QualityWing)&
hauls$Country=="SPA"]<-"mod4_wingspread_spa"
summary(hauls$Use_WingSpread[hauls$Country=="SPA"])
summary(as.factor(hauls$QualityWing[hauls$Country=="SPA"]))
png(file="WingSpread_data_spain_col_29-09-2016.png", bg="transparent")
cols<- rainbow(3)
plot(hauls$DepthNew[hauls$Country=="SPA"], hauls$Use_WingSpread[hauls$Country=="SPA"],
col=cols[as.factor(hauls$QualityWing[hauls$Country=="SPA"])], pch=19,
xlab="Depth (m)", ylab="WingSpread (m)")
legend(5, 35, levels(as.factor(hauls$QualityWing[hauls$Country=="SPA"])),
col=cols,
pch=15, ncol=1, cex=1, bty="n")
dev.off()
##########################
# 4.1.7.1.4 The NCT Gear #
##########################
# WingSpread = 15.1
hauls$WingSpread[hauls$Gear=="NCT"]<-15.1
hauls$Use_WingSpread[hauls$Gear=="NCT"]<-15.1
hauls$QualityWing[hauls$Gear=="NCT"]<-"mean_wingspread"
#######################
# 4.1.8.1 Door Spread #
#######################
###########################################
# 4.1.8.1.1 The GOV Otter Trawl (Model 1) #
###########################################
# for model election set up training data set
train<-subset(hauls, Gear=="GOV"& (!is.na(WingSpread)) & (!is.na(DoorSpread))
&(!is.na(Netopening)),)
model1_ds<- lmer(log(DoorSpread) ~ LogDepthCenter:EstSweepCat
+ (1|Ship:EstSweepCat),
data=train, REML=FALSE)
summary(model1_ds)
r.squaredGLMM(model1_ds)
cols<-rainbow(13)
data<-predict(model1_ds)
summary(as.factor(train$Survey))
png(file="GOV_Doorspreads_model1.png", bg="transparent")
plot(train$DepthNew, train$DoorSpread, col=cols[as.factor(train$Survey)],
pch=15, xlab="Depth (m)", ylab="WingSpread(m)")
points(train$DepthNew, exp(data), pch=21, col=cols[as.factor(train$Survey)],
bg="black")
lines(lowess(train$DoorSpread~train$DepthNew), col="black", lwd=2)
lines(lowess(exp(data)~train$DepthNew), col="lightgrey", lwd=3, lty=2)
legend(530, 80, levels(as.factor(hauls$Survey[hauls$Gear=="GOV"])),
col=cols, pch=15, ncol=2, cex=.9, bty="o")
dev.off()
# note - less compex limear model can't explain as much variance as mixed model
# the random effects are accounting for quite a lot of variance
# set up user data using selected mode # subset all GOV gear
the_gov<-subset(hauls, Gear=="GOV")
# 30538 obs
the_gov$mod1_doorspread_gov<-exp(predict(model1_ds, the_gov, allow.new.levels=T))
summary(the_gov$mod1_doorspread_gov)
# If real values are available use these
# predict results for spains net opening data
hauls$Use_DoorSpread[!is.na(hauls$DoorSpread)&
hauls$Gear=="GOV"]<-hauls$DoorSpread[!is.na(hauls$DoorSpread)&
hauls$Gear=="GOV"]
hauls$QualityDoor[!is.na(hauls$DoorSpread)]<-"raw_doorspread"
list<-the_gov$UniqueID
hauls$mod1_doorspread_gov[hauls$UniqueID%in%list]<-the_gov$mod1_doorspread_gov[the_gov$UniqueID%in%list]
hauls$Use_DoorSpread[is.na(hauls$DoorSpread)&
hauls$Gear=="GOV"]<-hauls$mod1_doorspread_gov[is.na(hauls$DoorSpread)&
hauls$Gear=="GOV"]
hauls$QualityDoor[is.na(hauls$DoorSpread)&!is.na(hauls$mod1_doorspread_gov)&
hauls$Gear=="GOV"]<-"mod1_door_gov"
#check door spreads
summary((hauls$Use_DoorSpread[hauls$Gear=="GOV"]))
summary(as.factor(hauls$QualityDoor[hauls$Gear=="GOV"]))
summary(hauls$Use_DoorSpread)
summary(model1_ds)
coeffs=coefficients(model1_ds);coeffs
#####################################
# 4.1.8.1.2 The ROT Trawl (Model 2) #
#####################################
summary(hauls$DoorSpread[hauls$Gear=="ROT"])
# Doorspread can be sorted first
# DoorSpread only has 955 missing values to be estimated
png(file = "doorspread_ROT_29-09-2016.png", bg = "transparent")
plot(hauls$Depth[hauls$Gear=="ROT"], hauls$DoorSpread[hauls$Gear=="ROT"],
pch=19, xlab="Depth (m)",
ylab="Door Spread (m)")
dev.off()
x<-hauls$Depth[hauls$Gear=="ROT"]
y<-hauls$DoorSpread[hauls$Gear=="ROT"]
plot(y~x,type="n")
m = lm(y~x)
wx = par("usr")[1:2]
new.x = seq(wx[1],wx[2],len=100)
pred = predict(m, new=data.frame(x=new.x), interval="conf", level=.95)
lines(new.x,pred[,"fit"],lwd=2)
lines(new.x,pred[,"lwr"],lty=3)
lines(new.x,pred[,"upr"],lty=3)
points(x,y,pch=16,col="steelblue")
# raw data looks good - no worrying outliers
corrhaul_rot<-subset(hauls, Gear=="ROT",
select=c(Year, Depth, Distance,
DoorSpread))
summary(corrhaul_rot)
require(corrgram)
png(file = "corrhaul_ROT_29-09-2016.png", bg = "transparent")
corrgram(corrhaul_rot, order="PCA", lower.panel=panel.shade,
upper.panel=panel.pie, text.panel=panel.txt,
main="Hauls Data NI")
dev.off()
# the only variable available to estimate doorspread is depth for ROT ship
# no sweep, no changes in gear , no wing or netopening
model2<-lm(log(DoorSpread[hauls$Gear=="ROT"])~log(Depth[hauls$Gear=="ROT"]), data=hauls)
AIC(model2)
summary(model2)
coeff=coefficients(model2); coeff
png(file = "logged_doorspread_ROT_29-09-2016.png", bg = "transparent")
plot(log(hauls$DepthNew[hauls$Gear=="ROT"]), log(hauls$DoorSpread[hauls$Gear=="ROT"]), pch=19, xlab="logged Depth (m)",
ylab="logged Doorspread (m)")
abline(a=(2.558093), b=(0.261106) , col="red", lwd=2)
dev.off()
hauls$mod_doorspread_rot[hauls$Gear=="ROT"]<-exp(coeff[1]+coeff[2]*log(hauls$DepthNew[hauls$Gear=="ROT"]))
# set up user values for doorspread
hauls$Use_DoorSpread[!is.na(hauls$DoorSpread)&
hauls$Gear=="ROT"]<-hauls$DoorSpread[!is.na(hauls$DoorSpread)&
hauls$Gear=="ROT"]
hauls$QualityDoor[!is.na(hauls$DoorSpread)&hauls$Gear=="ROT"]<-"raw_doorspread"
hauls$Use_DoorSpread[is.na(hauls$DoorSpread)&hauls$Gear=="ROT"]<-hauls$mod_doorspread_rot[is.na(hauls$DoorSpread)&hauls$Gear=="ROT"]
hauls$QualityDoor[is.na(hauls$DoorSpread)&hauls$Gear=="ROT"]<-"model_doorspread_rot"
summary(hauls$mod_doorspread_rot[hauls$Gear=="ROT"])
summary(hauls$Use_DoorSpread[hauls$Gear=="ROT"])
summary(hauls$Use_DoorSpread)
summary(as.factor(hauls$QualityDoor))
#########################################
# 4.1.8.1.3 The BAK Trawl (Model 3 & 4) #
#########################################
# 2 models selected - model 3 and model 4
# model3: dm3<-lm(log(DoorSpread) ~ LogWingSpreadCenter:SweepCat:Gear,data=spain)
door_dat<-subset(hauls, Country=="SPA"& !is.na(WingSpread),)
dm3<-lm(log(DoorSpread) ~ LogWingSpreadCenter:SweepCat,data=door_dat)
summary(dm3)
door_dat_len <- nrow(door_dat)
door_dat$predict_lm_door_dat<-exp(predict(dm3, door_dat, allow.new.levels=T))
plot(door_dat$DepthNew,(door_dat$predict_lm_door_dat),
pch=19, col=cols[as.factor(door_dat$Gear)])
# model2: dm4<-lm(log(DoorSpread) ~ LogDepthCenter:SweepCat:Gear,data=spain)
door_dat2<-subset(hauls, Country=="SPA",)
dm4<-lm(log(DoorSpread) ~ LogDepthCenter:SweepCat, data=door_dat2)
summary(dm4)
door_dat_len <- nrow(door_dat2)
door_dat2$predict_lm_door_dat<-exp(predict(dm4, door_dat2, allow.new.levels=T))
summary(door_dat2$predict_lm_door_dat)
plot(door_dat2$DepthNew,door_dat2$predict_lm_door_dat,
pch=19, col=cols[as.factor(door_dat2$Gear)])
formula(dm4)
summary(dm4)
coeffs=(coefficients(dm4))
coeffs
# predict results for spains door spread data
hauls$Use_DoorSpread<-hauls$DoorSpread
hauls$QualityDoor[!is.na(hauls$DoorSpread)]<-"raw_doorspread"
list<-door_dat2$UniqueID
hauls$mod2_doorspread_spa[hauls$Country=="SPA"]<-door_dat2$predict_lm_door_dat[door_dat$UniqueID%in%list]
list<-door_dat$UniqueID
hauls$mod1_doorspread_spa[hauls$UniqueID%in%list]<-door_dat$predict_lm_door_dat[door_dat$UniqueID%in%list]
hauls$Use_DoorSpread[is.na(hauls$DoorSpread)&
hauls$Country=="SPA"]<-hauls$mod1_doorspread_spa[is.na(hauls$DoorSpread)&
hauls$Country=="SPA"]
hauls$QualityDoor[is.na(hauls$DoorSpread)&!is.na(hauls$mod1_doorspread_spa)]<-"mod1_doorpread_spa"
hauls$Use_DoorSpread[is.na(hauls$Use_DoorSpread)&