In [None]:
import pandas as pd
import matplotlib.pyplot as plt

import cartopy
cartopy.config['data_dir'] = './maps'
import cartopy.crs as ccrs
from cartopy.feature import NaturalEarthFeature
import cartopy.feature as feat

In [None]:
# Read .csv files

Let's dig a bit further in the metadata. The WMO platform code is a 5 or 7 digit code assigned to platforms that transmit in the Global Telecommunication System (GTS, for more information you can check here https://www.ocean-ops.org/dbcp/data/sharing.html). Ocean-OPS (https://www.ocean-ops.org/board) manages an extensive metadata catalogue of marine observation deployments, and it is possible to explore it through an API. 
Here we will find how many data points do not have a WMO number associated, how many different platforms are in the dataset, grouped by platform type, and which devices they are. 

In [None]:
# Number of data points without WMO code
print("There are", str(sum(df_emodnet['wmo_platform_code'].isnull())), "datapoints without a WMO number associated")

# Retrieve device types
wmobytype=df_emodnet.wmo_platform_code.groupby(df_emodnet.EP_PLATFORM_TYPE).unique()
display(wmobytype)
print("The dataframe contains data from", str(wmobytype.index.values.shape[0]), "platform types and at least",
      str(wmobytype[0].shape[0]),"different platforms")
print("\n")

for wmocode in wmobytype[0]:
    url = ("https://www.ocean-ops.org/api/preview/data/platform?exp=[%22wmos.wmo=%27"
        +str(wmocode)+"%27%22]&include=[%22ref%22,%22wmos.wmo%22]&include=[%22ptfModel.name%22]")
    rdata = requests.get(url)
    jdata= json.loads(rdata.text)
    print("The platform of type","and WMO platform code of",str(wmocode) ,"corresponds to a", jdata['data'][0]['ptfModel']['name'], "device")

In [None]:
dflist=[df_ifremer,df_emodnet]
df=pd.concat(dflist)

proj=ccrs.cartopy.crs.Miller()
plt.figure(dpi=200)
ax = plt.axes(projection=proj)
#if ((df[vardict['lon']].min() < -175) & (df[vardict['lon']].max() > 175) & 
#    (df[vardict['lat']].min() <-85) & (df[vardict['lat']].max() > 85)) :
#    ax.set_extent([df[vardict['lon']].min()-5,df[vardict['lon']].max()+5, 
#                   df[vardict['lat']].max()+5,df[vardict['lat']].min()-5])
ax.stock_img()
ax.coastlines()

sc=ax.scatter(df_ifremer[vardict['lon']],df_ifremer[vardict['lat']],
    c='r',s=5,label='BGC-Argo IFREMER',
    transform=ccrs.PlateCarree())
sc=ax.scatter(df_emodnet[vardict['lon']],df_emodnet[vardict['lat']],
    c='b',s=1,label='EMODNET',
    transform=ccrs.PlateCarree())
    
#plt.colorbar(sc)
ax.legend(loc='lower center')
plt.show()

In [None]:
# latitude-depth scatterplots. Atlantic
# Mask Atlantic

plt.figure(figsize=(20,6), dpi=200)
ax = plt.axes()
sc=ax.scatter(pd.to_datetime(df_ifremer[vardict['datevec']]),
           df_ifremer[vardict['pres']],
           c=df_ifremer[vardict['ph']],
           s=5,label='BGC-Argo IFREMER')
ax.scatter(pd.to_datetime(df_emodnet[vardict['datevec']]),
           df_emodnet[vardict['pres']],
           c='y',
           s=1,label='EMODNET')
plt.xticks(rotation=45);
ax.invert_yaxis();
plt.colorbar(sc);
#ax.legend(loc='lower center')

In [None]:
print(df_emodnet.shape, df_ifremer_comp.shape)
ax=plt.axes()
ax.plot(df_emodnet[vardict['pres']], linewidth=5)
ax.plot(df_ifremer_comp[vardict['pres']], c='r')
print(df_emodnet[[vardict['pres'],vardict['ph']]].iloc[0:5])
print(df_ifremer[[vardict['pres'],vardict['ph']]].iloc[0:5])

In [None]:
# Check duplicates
# Use pres (decibar) from ifremer to compare the data points
df_ifremer_comp=df_ifremer.copy()
df_ifremer_comp.rename(
    columns={'pres (decibar)': vardict['pres'], 
             vardict['pres']: 'pres_adjusted (decibar)'},
        inplace=True)
df_ifremer_comp=df_ifremer_comp.round({vardict['ph']: 3})
print(df_ifremer_comp[[vardict['pres'],vardict['ph']]].iloc[0:5])
print()
dataframes=[df_emodnet,df_ifremer_comp]


df=pd.concat(dataframes)
print(df.shape)
print(df.columns)
isdup=df.duplicated(subset=[vardict['lat'],vardict['lon'],vardict['datevec'],vardict['pres'],vardict['ph']], keep=False)
print(sum(isdup))

In [None]:
df.loc[~isdup]
#print(df_ifremer.iloc[168])
#print(df_emodnet.iloc[168])
print(df[~isdup].groupby('SOURCE').mean())