In [None]:
from pydap.client import open_url
from datetime import datetime
import numpy as np
import pandas as pd
import time
from datetime import timedelta
import pyproj
import xarray as xr
from scipy.interpolate import LinearNDInterpolator
import matplotlib.pyplot as plt
import math
import pickle
from datetime import date
import os
import plotly.express as px
import cartopy.crs as ccrs
from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter
import matplotlib
import cartopy as cart


def retrieve_microplastics_gt():
    lats = np.zeros(181)
    count = 90
    for i in range(len(lats)):
        lats[i] = count
        count -= 1
    for filename in os.listdir('../microplast_gt'):
        logfilename = filename[:-4] + '_log'
        try:
            df = pd.read_csv('../microplast_gt/' + filename, header=None)

        except:
            print(filename)
            continue
        longlist = []
        latlist = []
        valuelist_log = []
        valuelist = []
        for long in range(0, 361):
            for lat in range(0, 181):
                val = df[long][lat]
                if val < 1:
                    val = np.nan
                longlist.append(long)
                latlist.append(lats[lat])
                valuelist_log.append(np.log10(val))
                valuelist.append(val)
        if filename == "lebretonmodel_abundance.csv":
            res_df = pd.DataFrame(
                {'sp_lon': longlist, 'sp_lat': latlist, filename[:-4]: valuelist, logfilename: valuelist_log})
        else:
            res_df[filename[:-4]] = valuelist
            res_df[logfilename] = valuelist_log
    return res_df


def plot_wind(df, var1, var2, interpolated=False):
    # Settings for the plot
    ax = plt.axes(projection=ccrs.PlateCarree())
    ax.coastlines()
    lon_formatter = LongitudeFormatter()
    lat_formatter = LatitudeFormatter()
    ax.xaxis.set_major_formatter(lon_formatter)
    ax.yaxis.set_major_formatter(lat_formatter)
    ax.gridlines(draw_labels=True, alpha=0.5)
    plt.scatter(df['sp_lon'], df['sp_lat'], c=list(np.sqrt(df[var1] ** 2 + df[var2] ** 2)), cmap='RdBu')
    bar = plt.colorbar(pad=0.15, orientation='horizontal')
    bar.ax.set_title('m/s')
    plt.xlabel('Longitude')
    plt.ylabel('Latitude')
    if interpolated:
        plt.savefig('wind_speed_inter.png')
    else:
        plt.savefig('wind_speed.png')
    plt.show()


def plot_var(df, var):
    # Settings for the plot
    ax = plt.axes(projection=ccrs.PlateCarree())
    ax.coastlines()
    ax.add_feature(cart.feature.LAND, zorder=100, edgecolor='k')
    ax.gridlines(draw_labels=True, alpha=0.5)
    lon_formatter = LongitudeFormatter(zero_direction_label=True)
    lat_formatter = LatitudeFormatter()
    ax.xaxis.set_major_formatter(lon_formatter)
    ax.yaxis.set_major_formatter(lat_formatter)
    plt.scatter(df['lon'], df['lat'], c=list(df[var]))
    bar = plt.colorbar(pad=0.15, orientation='horizontal')
    plt.title(var)
    bar.ax.set_title('Power 10^')
    plt.xlabel('Longitude')
    plt.ylabel('Latitude')
    plt.savefig('nbrcs-diff.svg')
    plt.show()

# SET AREA, Function Is called when extracting CYGNSS, OSKAR and ERA5
def reduce_area_of_df(df):
    df = df[df.sp_lat <= 40]
    df = df[df.sp_lat >= -40]

    df_northern = df[df.sp_lat >= 20]
    df_southern = df[df.sp_lat <= -20]
    df = pd.concat([df_northern, df_southern])
    df = df[df.sp_lon >= 220]
    return df[df.sp_lon <= 260]


# SET AREA, Function Is called when extracting CYGNSS, OSKAR and ERA5
def reduce_area_of_df(df):
    df = df[df.sp_lat <= 40]
    df = df[df.sp_lat >= -40]

    df_northern = df[df.sp_lat >= 20]
    df_southern = df[df.sp_lat <= -20]
    df = pd.concat([df_northern, df_southern])
    df = df[df.sp_lon >= 220]
    return df[df.sp_lon <= 260]

def open_anomalies_csvs():
    df_list = []
    for filename in os.listdir('../-40_-20_220_260'):
        df_list.append(pd.read_csv('../-40_-20_220_260/' + filename))
    if len(df_list) == 1:
        return df_list[0]
    else:
        return pd.concat(df_list)

In [None]:
mic_df = retrieve_microplastics_gt().dropna()
interp_micro_mass = LinearNDInterpolator(list(zip(mic_df['sp_lon'], mic_df['sp_lat'])), mic_df['vansebillemodel_mass_log'])
interp_micro_abu = LinearNDInterpolator(list(zip(mic_df['sp_lon'], mic_df['sp_lat'])), mic_df['vansebillemodel_abundance_log'])

In [None]:
plot_var(mic_df, 'vansebillemodel_mass_log')

In [None]:
df = open_anomalies_csvs()
df['delta'] = np.sqrt((df.wind_u10-df.current_u)**2 + (df.wind_v10-df.current_v)**2)
mass = interp_micro_mass(df['lon'], df['lat'])
abu = interp_micro_abu(df['lon'], df['lat'])
df['micro_mass'] = mass
df['abundace'] = abu
df = df.dropna()
df

In [None]:
df.corr()

In [None]:
#df_modified = df[df.delta >= 2]
#df_modified = df_modified[df_modified.delta <= 6]
df_modified = df[np.sqrt(df.wind_u10**2 + df.wind_v10**2) >= 3]
df_modified = df_modified[np.sqrt(df.wind_u10**2 + df.wind_v10**2)<= 11]
df_modified = df_modified[df_modified.mss_anomaly_towards < 0.1]
df_modified = df_modified[df_modified.mss_anomaly_towards > -0.2]
df_modified

In [None]:
bin_df = pd.DataFrame({'mss':df_modified.mss_anomaly_towards, 'micro' : df_modified.micro_mass })
cut = pd.cut(bin_df['mss'], bins = np.linspace(bin_df.mss.min(), bin_df.mss.max(), 150)).cat.codes
bin_df['mss_bins'] = cut 
bin_df

In [None]:
mean_micro_by_bin = bin_df.groupby(['mss_bins'], as_index=False)['micro'].mean()
mean_micro_by_bin
plt.step(
    mean_micro_by_bin['mss_bins'],
    mean_micro_by_bin['micro'],
    where='mid',
)
plt.xlabel('mss_bins')
plt.ylabel('micro')
plt.show()

In [None]:
first_n_column  = df_modified.iloc[: , :3]
first_n_column['lon'] = first_n_column['lon'].round(0).astype(int)
first_n_column['lat'] = first_n_column['lat'].round(0).astype(int)
first_n_column = first_n_column.groupby(['lon', 'lat'], as_index=False)['mss_anomaly_mostafa'].mean()
mass = interp_micro_mass(first_n_column['lon'], first_n_column['lat'])
abu = interp_micro_abu(first_n_column['lon'], first_n_column['lat'])
first_n_column['micro_mass'] = mass
first_n_column['abundace'] = abu
first_n_column

In [None]:
plot_var(first_n_column, 'micro_mass')
plot_var(first_n_column, 'mss_anomaly_mostafa')

In [None]:
from catboost import CatBoostRegressor
from sklearn.linear_model import RANSACRegressor
from sklearn.linear_model import LinearRegression

# plot the dataset and the model's line of best fit
def plot_best_fit(X, y, model):
    # fut the model on all data
    model.fit(X, y)
    # plot the dataset
    plt.scatter(X, y)
    # plot the line of best fit
    xaxis = np.arange(X.min(), X.max(), 0.01)
    yaxis = model.predict(xaxis.reshape((len(xaxis), 1)))
    plt.plot(xaxis, yaxis, color='r')
    # show the plot
    plt.title(type(model).__name__)
    plt.show()
# define the model
model = LinearRegression()
model = CatBoostRegressor(verbose = False)
plot_best_fit(np.array(df_modified.mss_anomaly_towards).reshape(-1, 1), df_modified.micro_mass, model)

In [None]:
plot_df = pd.DataFrame({'mss_anomaly_towards' : df_modified.mss_anomaly_towards, 'micro_mass' : df_modified.micro_mass})
#plot_df = plot_df[plot_df.mss_anomaly_towards > -0.1227]
#plot_df = plot_df[plot_df.mss_anomaly_towards < -0.0478]
fig = px.density_heatmap(plot_df, y="micro_mass", x="mss_anomaly_towards", nbinsx = 50 ,color_continuous_scale=px.colors.sequential.Blackbody)

fig.show()

In [None]:
df_wind_plot = pd.DataFrame({'nbrcs' : df_modified.nbrcs, 'wind' : np.sqrt(df_modified.wind_u10**2 + df_modified.wind_v10**2)})
fig = px.density_heatmap(df_wind_plot, y="wind", x="nbrcs", color_continuous_scale=px.colors.sequential.Blackbody)
fig.show()