In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from scipy import stats
from pathlib import Path
from datetime import datetime

# Geometry libraries
import geopandas as gpd
from shapely.geometry import Polygon, LineString, Point
import psycopg2

from sqlalchemy import create_engine

ROOT = Path.cwd()

In [2]:
DataDir = ROOT / "Data"

In [132]:
norm_temperatureDF = pd.read_csv(DataDir / "Normalized/norm_temperatureDF.csv")
norm_chlorophyllDF = pd.read_csv(DataDir / "Normalized/norm_chlorophyllDF.csv")
norm_aisDF = pd.read_csv(DataDir / "Normalized/norm_aisDF.csv")
norm_fishingDF = pd.read_csv(DataDir / "Normalized/norm_fishingDF.csv")

In [133]:
norm_temperatureDF.head()


Unnamed: 0,time,latitude,longitude,temperature,partOfTheDay,normLat,normLon
0,2020-01-01,-35.25,-69.75,288.169121,day,109.0,220.0
1,2020-01-01,-35.25,-69.25,288.169121,day,109.0,221.0
2,2020-01-01,-35.25,-68.75,288.169121,day,109.0,222.0
3,2020-01-01,-35.25,-68.25,288.169121,day,109.0,223.0
4,2020-01-01,-35.25,-67.75,288.169121,day,109.0,224.0


In [134]:
norm_chlorophyllDF.head()

Unnamed: 0,time,latitude,longitude,chlor_a,normLat,normLon
0,2020-01-01,-35.25,-69.75,1.04,109.0,220.0
1,2020-01-01,-35.25,-69.25,1.04,109.0,221.0
2,2020-01-01,-35.25,-68.75,1.04,109.0,222.0
3,2020-01-01,-35.25,-68.25,1.04,109.0,223.0
4,2020-01-01,-35.25,-67.75,1.04,109.0,224.0


In [7]:
norm_aisDF.head()

Unnamed: 0,BoatName,BoatID,Date,Latitude,Longitude,normLat,normLon
0,Mason,111,2020-01-01 08:00:00,-38.035,-57.52777,103.0,244.0
1,Mason,111,2020-01-01 12:00:00,-38.03527,-57.52777,103.0,244.0
2,Mason,111,2020-01-02 08:00:00,-38.035,-57.52777,103.0,244.0
3,Mason,111,2020-01-02 12:00:00,-38.035,-57.52777,103.0,244.0
4,Mason,111,2020-01-02 23:00:00,-38.035,-57.52777,103.0,244.0


In [8]:
norm_fishingDF.head()

Unnamed: 0,BoatName,BoatID,Trip,Day,Kg,Duration,Lines,Temperature,Latitude,Longitude,normLat,normLon
0,Mason,111,2020-1,2020-01-12,11492,12,60,14.5,-45.1,-62.0,89.0,236.0
1,Mason,111,2020-1,2020-01-13,10582,12,60,14.5,-45.033333,-62.1,89.0,235.0
2,Mason,111,2020-1,2020-01-18,21164,12,60,13.5,-44.833333,-63.816667,90.0,232.0
3,Mason,111,2020-1,2020-01-19,18980,12,60,14.0,-44.833333,-63.833333,90.0,232.0
4,Mason,111,2020-1,2020-01-20,16796,12,60,13.7,-44.866667,-63.816667,90.0,232.0


In [60]:
norm_tempDF_simply = norm_temperatureDF.groupby(by=["time","normLat","normLon"]).mean().reset_index()
norm_tempDF_simply

Unnamed: 0,time,normLat,normLon,latitude,longitude,temperature
0,2020-01-01,80.0,220.0,-49.75,-69.75,288.169121
1,2020-01-01,80.0,221.0,-49.75,-69.25,288.169121
2,2020-01-01,80.0,222.0,-49.75,-68.75,288.169121
3,2020-01-01,80.0,223.0,-49.75,-68.25,288.169121
4,2020-01-01,80.0,224.0,-49.75,-67.75,288.169121
...,...,...,...,...,...,...
218395,2020-06-30,109.0,255.0,-35.25,-52.25,288.169121
218396,2020-06-30,109.0,256.0,-35.25,-51.75,288.169121
218397,2020-06-30,109.0,257.0,-35.25,-51.25,288.169121
218398,2020-06-30,109.0,258.0,-35.25,-50.75,288.169121


## Convert the pandas dataframe to geoPandas dataframe
 

In [121]:
geoFishingDF = gpd.GeoDataFrame(norm_fishingDF, 
                                geometry=gpd.points_from_xy(norm_fishingDF.normLat, norm_fishingDF.normLon),
                                crs = "EPSG:3857")
geoFishingDF = geoFishingDF[["BoatName", "BoatID", "Day", "Kg", "Duration", "Lines", "Temperature", "geometry"]]
geoFishingDF.head() # 644 x 7

Unnamed: 0,BoatName,BoatID,Day,Kg,Duration,Lines,Temperature,geometry
0,Mason,111,2020-01-12,11492,12,60,14.5,POINT (89.000 236.000)
1,Mason,111,2020-01-13,10582,12,60,14.5,POINT (89.000 235.000)
2,Mason,111,2020-01-18,21164,12,60,13.5,POINT (90.000 232.000)
3,Mason,111,2020-01-19,18980,12,60,14.0,POINT (90.000 232.000)
4,Mason,111,2020-01-20,16796,12,60,13.7,POINT (90.000 232.000)


In [63]:
geoTemperatureDF = gpd.GeoDataFrame(norm_tempDF_simply, 
                                geometry=gpd.points_from_xy(norm_tempDF_simply.normLat, norm_tempDF_simply.normLon),
                                crs = "EPSG:3857")
geoTemperatureDF = geoTemperatureDF[["time", "temperature", "geometry"]]
geoTemperatureDF.head() # 218400 x 4

Unnamed: 0,time,temperature,geometry
0,2020-01-01,288.169121,POINT (80.000 220.000)
1,2020-01-01,288.169121,POINT (80.000 221.000)
2,2020-01-01,288.169121,POINT (80.000 222.000)
3,2020-01-01,288.169121,POINT (80.000 223.000)
4,2020-01-01,288.169121,POINT (80.000 224.000)


In [18]:
geoChlorophyllDF = gpd.GeoDataFrame(norm_chlorophyllDF, 
                                geometry=gpd.points_from_xy(norm_chlorophyllDF.normLat, norm_chlorophyllDF.normLon),
                                crs = "EPSG:3857")
geoChlorophyllDF = geoChlorophyllDF[["time", "chlor_a", "geometry"]]
geoChlorophyllDF.head() # 218400 x 3

Unnamed: 0,time,chlor_a,geometry
0,2020-01-01,1.04,POINT (109.000 220.000)
1,2020-01-01,1.04,POINT (109.000 221.000)
2,2020-01-01,1.04,POINT (109.000 222.000)
3,2020-01-01,1.04,POINT (109.000 223.000)
4,2020-01-01,1.04,POINT (109.000 224.000)


In [122]:
boatMap_Name_ID = geoFishingDF[["BoatName", "BoatID"]].drop_duplicates().reset_index(drop=True)
boatMap_Name_ID 

Unnamed: 0,BoatName,BoatID
0,Mason,111
1,Rey,112
2,Korbin,113
3,Armani,114
4,Rodney,115


In [135]:
geoFeatures = geoTemperatureDF.copy()
geoFeatures = geoFeatures.merge(geoChlorophyllDF, on=["time", "geometry"])
geoFeatures.head()

#for i,_ in geoFeatures.iterrows():

#    geoFeatures["chlor_a"] = geoChlorophyllDF[
#                                (geoChlorophyllDF["geometry"].intersects(geoFeatures['geometry'][i])) & 
#                                (geoFeatures["time"][i] == geoChlorophyllDF["time"])]["chlor_a"]

Unnamed: 0,time,temperature,geometry,chlor_a
0,2020-01-01,288.169121,POINT (80.000 220.000),1.04
1,2020-01-01,288.169121,POINT (80.000 221.000),1.04
2,2020-01-01,288.169121,POINT (80.000 222.000),1.04
3,2020-01-01,288.169121,POINT (80.000 223.000),1.04
4,2020-01-01,288.169121,POINT (80.000 224.000),1.04


In [123]:
geoFishingDF

Unnamed: 0,BoatName,BoatID,Day,Kg,Duration,Lines,Temperature,geometry
0,Mason,111,2020-01-12,11492,12,60,14.5,POINT (89.000 236.000)
1,Mason,111,2020-01-13,10582,12,60,14.5,POINT (89.000 235.000)
2,Mason,111,2020-01-18,21164,12,60,13.5,POINT (90.000 232.000)
3,Mason,111,2020-01-19,18980,12,60,14.0,POINT (90.000 232.000)
4,Mason,111,2020-01-20,16796,12,60,13.7,POINT (90.000 232.000)
...,...,...,...,...,...,...,...,...
639,Rodney,115,2020-06-21,21790,12,100,11.0,POINT (95.000 241.000)
640,Rodney,115,2020-06-22,20410,12,100,11.2,POINT (95.000 241.000)
641,Rodney,115,2020-06-25,23330,12,100,10.3,POINT (95.000 241.000)
642,Rodney,115,2020-06-26,8120,12,100,10.9,POINT (96.000 241.000)


In [124]:
geoFishingDF_in = geoFishingDF.copy()
#geoFishingDF_in = geoFeatures.merge(geoFeatures, left_on=["Day"], right_on=["time"])
geoFishingDF_in = geoFishingDF_in.sjoin(geoFeatures, how="inner", predicate = "intersects")
geoFishingDF_in

Unnamed: 0,BoatName,BoatID,Day,Kg,Duration,Lines,Temperature,geometry,index_right,time,temperature,chlor_a
0,Mason,111,2020-01-12,11492,12,60,14.5,POINT (89.000 236.000),51976,2020-02-13,288.169121,1.04
0,Mason,111,2020-01-12,11492,12,60,14.5,POINT (89.000 236.000),207976,2020-06-22,288.169121,1.04
0,Mason,111,2020-01-12,11492,12,60,14.5,POINT (89.000 236.000),72376,2020-03-01,288.169121,1.04
0,Mason,111,2020-01-12,11492,12,60,14.5,POINT (89.000 236.000),376,2020-01-01,288.169121,1.04
0,Mason,111,2020-01-12,11492,12,60,14.5,POINT (89.000 236.000),65176,2020-02-24,288.169121,1.04
...,...,...,...,...,...,...,...,...,...,...,...,...
631,Rodney,115,2020-06-13,30130,12,100,10.1,POINT (93.000 240.000),160140,2020-05-13,288.169121,1.04
633,Rodney,115,2020-06-15,5580,12,100,9.2,POINT (93.000 240.000),160140,2020-05-13,288.169121,1.04
629,Rodney,115,2020-06-06,17720,12,100,10.2,POINT (93.000 240.000),44940,2020-02-07,288.169121,1.04
631,Rodney,115,2020-06-13,30130,12,100,10.1,POINT (93.000 240.000),44940,2020-02-07,288.169121,1.04


In [125]:
geoFishingDF_in["temperature"] = geoFishingDF_in["temperature"] - 273.15
geoFishingDF_in

Unnamed: 0,BoatName,BoatID,Day,Kg,Duration,Lines,Temperature,geometry,index_right,time,temperature,chlor_a
0,Mason,111,2020-01-12,11492,12,60,14.5,POINT (89.000 236.000),51976,2020-02-13,15.019121,1.04
0,Mason,111,2020-01-12,11492,12,60,14.5,POINT (89.000 236.000),207976,2020-06-22,15.019121,1.04
0,Mason,111,2020-01-12,11492,12,60,14.5,POINT (89.000 236.000),72376,2020-03-01,15.019121,1.04
0,Mason,111,2020-01-12,11492,12,60,14.5,POINT (89.000 236.000),376,2020-01-01,15.019121,1.04
0,Mason,111,2020-01-12,11492,12,60,14.5,POINT (89.000 236.000),65176,2020-02-24,15.019121,1.04
...,...,...,...,...,...,...,...,...,...,...,...,...
631,Rodney,115,2020-06-13,30130,12,100,10.1,POINT (93.000 240.000),160140,2020-05-13,15.019121,1.04
633,Rodney,115,2020-06-15,5580,12,100,9.2,POINT (93.000 240.000),160140,2020-05-13,15.019121,1.04
629,Rodney,115,2020-06-06,17720,12,100,10.2,POINT (93.000 240.000),44940,2020-02-07,15.019121,1.04
631,Rodney,115,2020-06-13,30130,12,100,10.1,POINT (93.000 240.000),44940,2020-02-07,15.019121,1.04


In [131]:
geoFishingDF_filtered = geoFishingDF_in[geoFishingDF_in["Day"] == geoFishingDF_in["time"]]
geoFishingDF_filtered = geoFishingDF_filtered[["BoatName", "BoatID", "Day", "Kg", "Duration", "Temperature", "temperature", "chlor_a", "geometry"]]
geoFishingDF_filtered.sort_values(by=["BoatID", "Day"], inplace=True)
geoFishingDF_filtered.reset_index(drop=True, inplace=True)
geoFishingDF_filtered

Unnamed: 0,BoatName,BoatID,Day,Kg,Duration,Temperature,temperature,chlor_a,geometry
0,Mason,111,2020-01-12,11492,12,14.5,15.019121,1.04,POINT (89.000 236.000)
1,Mason,111,2020-01-13,10582,12,14.5,15.019121,1.04,POINT (89.000 235.000)
2,Mason,111,2020-01-18,21164,12,13.5,15.019121,1.04,POINT (90.000 232.000)
3,Mason,111,2020-01-19,18980,12,14.0,15.019121,1.04,POINT (90.000 232.000)
4,Mason,111,2020-01-20,16796,12,13.7,15.019121,1.04,POINT (90.000 232.000)
...,...,...,...,...,...,...,...,...,...
639,Rodney,115,2020-06-21,21790,12,11.0,15.019121,1.04,POINT (95.000 241.000)
640,Rodney,115,2020-06-22,20410,12,11.2,15.019121,1.04,POINT (95.000 241.000)
641,Rodney,115,2020-06-25,23330,12,10.3,15.019121,1.04,POINT (95.000 241.000)
642,Rodney,115,2020-06-26,8120,12,10.9,15.019121,1.04,POINT (96.000 241.000)


# fnrwiofnoiramfpoamfpoeam,fopam,fpoea,fea,

In [40]:
geoFishingDF

Unnamed: 0,BoatName,BoatID,Day,Kg,Duration,Lines,geometry
0,Mason,111,2020-01-12,11492,12,60,POINT (89.000 236.000)
1,Mason,111,2020-01-13,10582,12,60,POINT (89.000 235.000)
2,Mason,111,2020-01-18,21164,12,60,POINT (90.000 232.000)
3,Mason,111,2020-01-19,18980,12,60,POINT (90.000 232.000)
4,Mason,111,2020-01-20,16796,12,60,POINT (90.000 232.000)
...,...,...,...,...,...,...,...
639,Rodney,115,2020-06-21,21790,12,100,POINT (95.000 241.000)
640,Rodney,115,2020-06-22,20410,12,100,POINT (95.000 241.000)
641,Rodney,115,2020-06-25,23330,12,100,POINT (95.000 241.000)
642,Rodney,115,2020-06-26,8120,12,100,POINT (96.000 241.000)


In [None]:
geoFishingDF[""]
for i, _ in geoFishingDF

  geoTemperatureDF.groupby(by=["time", "geometry"]).mean()


TypeError: '<' not supported between instances of 'Point' and 'Point'

In [41]:
geoTemperatureDF.dissolve(by=["time"], aggfunc='mean')
#geoFishingDF.dissolve(by=["BoatID","Day"], aggfunc='mean')
#day = geoFishingDF["Day"][0]

#for i,_ in geoFishingDF.iterrows():
#    day = geoFishingDF["Day"][i]

Unnamed: 0,time,temperature,partOfTheDay,geometry
0,2020-01-01,288.169121,day,POINT (109.000 220.000)
1,2020-01-01,288.169121,day,POINT (109.000 221.000)
2,2020-01-01,288.169121,day,POINT (109.000 222.000)
3,2020-01-01,288.169121,day,POINT (109.000 223.000)
4,2020-01-01,288.169121,day,POINT (109.000 224.000)
...,...,...,...,...
436795,2020-06-30,288.169121,night,POINT (80.000 255.000)
436796,2020-06-30,288.169121,night,POINT (80.000 256.000)
436797,2020-06-30,288.169121,night,POINT (80.000 257.000)
436798,2020-06-30,288.169121,night,POINT (80.000 258.000)


In [44]:
geoTemperatureDF.dissolve(by=["time", "partOfTheDay"], aggfunc='mean')

KeyError: 'geometry'

In [33]:
geoDF = geoFishingDF.sjoin(geoTemperatureDF, how='inner', predicate='intersects')
geoDF.drop(["BoatID","index_right"], axis=1)

Unnamed: 0,BoatName,Day,Kg,Duration,Lines,geometry,time,temperature,partOfTheDay
0,Mason,2020-01-12,11492,12,60,POINT (89.000 236.000),2020-05-04,288.169121,night
0,Mason,2020-01-12,11492,12,60,POINT (89.000 236.000),2020-05-25,288.169121,day
0,Mason,2020-01-12,11492,12,60,POINT (89.000 236.000),2020-04-30,288.169121,night
0,Mason,2020-01-12,11492,12,60,POINT (89.000 236.000),2020-04-26,288.169121,day
0,Mason,2020-01-12,11492,12,60,POINT (89.000 236.000),2020-05-27,288.169121,night
...,...,...,...,...,...,...,...,...,...
631,Rodney,2020-06-13,30130,12,100,POINT (93.000 240.000),2020-02-16,288.169121,day
633,Rodney,2020-06-15,5580,12,100,POINT (93.000 240.000),2020-02-16,288.169121,day
629,Rodney,2020-06-06,17720,12,100,POINT (93.000 240.000),2020-04-23,288.169121,night
631,Rodney,2020-06-13,30130,12,100,POINT (93.000 240.000),2020-04-23,288.169121,night
