# 2. Data normalization/unification

#### Imports and constants

In [31]:
import pandas as pd
import numpy as np
import seaborn as sns
from scipy import stats
from pathlib import Path
from datetime import datetime

# Geometry libraries
import geopandas as gpd
from shapely.geometry import Polygon, LineString, Point


ROOT = Path.cwd()

#### Reading

In [2]:
DataDir = ROOT / "Data"
DataDir

WindowsPath('d:/00.Mis_Cosas/Universidad/Master/Q2/DMT/Project/Project2_Processing&Querying_TransportationData/Code/Data')

In [3]:
chlorophyllDF = pd.read_csv(DataDir / "clean_chlorophyll.csv")
temperatureDF = pd.read_csv(DataDir / "clean_temperature.csv")

In [4]:
chlorophyllDF

Unnamed: 0,time,latitude,longitude,chlor_a
0,2020-01-01,-35.25,-69.75,1.04
1,2020-01-01,-35.25,-69.25,1.04
2,2020-01-01,-35.25,-68.75,1.04
3,2020-01-01,-35.25,-68.25,1.04
4,2020-01-01,-35.25,-67.75,1.04
...,...,...,...,...
218395,2020-06-30,-49.75,-52.25,1.04
218396,2020-06-30,-49.75,-51.75,1.04
218397,2020-06-30,-49.75,-51.25,1.04
218398,2020-06-30,-49.75,-50.75,1.04


In [7]:
chlorophyllDF.dtypes

time          object
latitude     float64
longitude    float64
chlor_a      float64
dtype: object

In [5]:
temperatureDF

Unnamed: 0,time,latitude,longitude,temperature,partOfTheDay
0,2020-01-01,-35.25,-69.75,288.169121,day
1,2020-01-01,-35.25,-69.25,288.169121,day
2,2020-01-01,-35.25,-68.75,288.169121,day
3,2020-01-01,-35.25,-68.25,288.169121,day
4,2020-01-01,-35.25,-67.75,288.169121,day
...,...,...,...,...,...
436795,2020-06-30,-49.75,-52.25,288.169121,night
436796,2020-06-30,-49.75,-51.75,288.169121,night
436797,2020-06-30,-49.75,-51.25,288.169121,night
436798,2020-06-30,-49.75,-50.75,288.169121,night


In [6]:
temperatureDF.dtypes

time             object
latitude        float64
longitude       float64
temperature     float64
partOfTheDay     object
dtype: object

## 2A. Generate square geometries.
Temperature and chlorophyll data are already preprocessed (homogenized) to use the common resolution of 0.5º, meaning that each coordinate in these data sets is actually covering a square of 0.5º of size.

In [57]:
maxi_lat = max(chlorophyllDF['latitude'].max(), temperatureDF['latitude'].max())
mini_lat = min(chlorophyllDF['latitude'].min(), temperatureDF['latitude'].min())
print('latitude min and max: [{}, {}]'.format(mini_lat, maxi_lat))   

latitude min and max: [-49.75, -35.25]


In [58]:
maxi_lon = max(chlorophyllDF['longitude'].max(), temperatureDF['longitude'].max())
mini_lon = min(chlorophyllDF['longitude'].min(), temperatureDF['longitude'].min())
print('longitude min and max: [{}, {}]'.format(mini_lon, maxi_lon))   

longitude min and max: [-69.75, -50.25]


In [63]:
latitude_coord = np.arange(-50, -35, 0.5)
latitude_coord

array([-50. , -49.5, -49. , -48.5, -48. , -47.5, -47. , -46.5, -46. ,
       -45.5, -45. , -44.5, -44. , -43.5, -43. , -42.5, -42. , -41.5,
       -41. , -40.5, -40. , -39.5, -39. , -38.5, -38. , -37.5, -37. ,
       -36.5, -36. , -35.5])

In [69]:
longitude_coord = np.arange(-70, -50, 0.5)
longitude_coord

array([-70. , -69.5, -69. , -68.5, -68. , -67.5, -67. , -66.5, -66. ,
       -65.5, -65. , -64.5, -64. , -63.5, -63. , -62.5, -62. , -61.5,
       -61. , -60.5, -60. , -59.5, -59. , -58.5, -58. , -57.5, -57. ,
       -56.5, -56. , -55.5, -55. , -54.5, -54. , -53.5, -53. , -52.5,
       -52. , -51.5, -51. , -50.5])

In [24]:
lat = np.union1d(chlorophyllDF['latitude'].unique(), temperatureDF['latitude'].unique())
lon = np.union1d(chlorophyllDF['longitude'].unique(), temperatureDF['longitude'].unique())

In [36]:
lat

array([-49.75, -49.25, -48.75, -48.25, -47.75, -47.25, -46.75, -46.25,
       -45.75, -45.25, -44.75, -44.25, -43.75, -43.25, -42.75, -42.25,
       -41.75, -41.25, -40.75, -40.25, -39.75, -39.25, -38.75, -38.25,
       -37.75, -37.25, -36.75, -36.25, -35.75, -35.25])

In [25]:
len(lat)

30

In [33]:
lon

array([-69.75, -69.25, -68.75, -68.25, -67.75, -67.25, -66.75, -66.25,
       -65.75, -65.25, -64.75, -64.25, -63.75, -63.25, -62.75, -62.25,
       -61.75, -61.25, -60.75, -60.25, -59.75, -59.25, -58.75, -58.25,
       -57.75, -57.25, -56.75, -56.25, -55.75, -55.25, -54.75, -54.25,
       -53.75, -53.25, -52.75, -52.25, -51.75, -51.25, -50.75, -50.25])

In [29]:
len(lon)

40

In [35]:
print("{}, {}".format(lat[0], lon[0]))

-49.75, -69.75


In [46]:
mySquare = LineString([ (lat[0], lon[0]), (lat[0], lon[1]), (lat[1], lon[1]), (lat[1], lon[0]), (lat[0], lon[0])])
mySquare.bounds
#print("{}, {}".format(mySquare.bounds[0], mySquare.bounds[1]))


(-49.75, -69.75, -49.25, -69.25)