In [None]:
import os
from tqdm import tqdm
import pandas as pd
import numpy as np
from scipy import stats
from datetime import datetime, timedelta
from scipy.interpolate import LinearNDInterpolator
import plotly.express as px
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy as cart
from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter
import matplotlib
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
from regions import PixCoord, PolygonPixelRegion
import xarray as xr

In [None]:
# GOES FROM MONTLY MSS ANOMALY OBSERVATIONS TO ALL TIME MSS ANOMALY WITH MICROPLASTICS INTERPOLTATED 1x1
directory = "C:/Users/syversk/Desktop/mss_v3.0/monthly_mss_ano"
files = os.listdir(directory)
df_list = []
for i in tqdm(range(len(files))):
    df = pd.read_csv(directory + "/" + files[i])
#Go from montly to all average    
    df_list.append(df)
df = pd.concat(df_list)
df = group_dataframe_temporally(df)
df = interp_microplastics(df)

In [None]:
df

In [None]:
ds = xr.open_dataset('C:/Users/syversk/Desktop/evans_ruf.nc')
df = ds.to_dataframe()
df = df.reset_index()

df = df.dropna()
df["lon"] = df["lon"].apply(lambda lon: round(lon/4))
df["lat"] = df["lat"].apply(lambda lat: round(lat/4)-37)

df = df.groupby(['lon', "lat"], as_index=False)[['mss_anom']].mean()
df = interp_microplastics(df)


In [None]:
df

In [None]:
df.rename(columns = {'mss_ano_w_gdt_pasific':'mss_anomaly_new_regions_refitted', 'mss_ano_w_gdt_towards_cr':'mss_anomaly_old_regions_refitted',
                    'mss_ano_w_towards':'mss_anomaly_old_regions_refrence'}, inplace = True)

In [None]:
df = df[df["mss_anomaly_new_regions_refitted"] > -0.3]
df = df[df["mss_anomaly_new_regions_refitted"] < 0.1]

In [None]:
df.corr()

In [None]:
#plot_var_2(df, "mss_anomaly_new_regions_refitted")
#plot_var_2(df, "mss_anomaly_old_regions_refitted")
#plot_var_2(df, "mss_anomaly_old_regions_refrence")
plot_var_2(df, "peaks")
#plot_var_2(df, "micro_mass")

In [None]:
df["peaks"] = df.apply(lambda row: peak_interp(row.lat, row.lon), axis = 1)

In [None]:
df.corr()

In [None]:
df = df[df.lat <= 40]
df = df[df.lat >= 20]
df = df[df.lon >= 170]
df = df[df.lon <= 210]

In [None]:
df.corr()

In [None]:
df = find_highest_corrolation_boundries(df, 100, "mss_anomaly_wind_gdt")

In [None]:
df = df[df["mss_anomaly_new_regions_refitted"] > -0.3]
df = df[df["mss_anomaly_new_regions_refitted"] < 1]

In [None]:
df = df[df["peaks"] < 700]
df = df[df["peaks"] > 0]

In [None]:
df = reduce_based_on_sd(df)

In [None]:
plot_var_2(df, "micro_mass")
#plot_var_2(df, "peaks")
plot_var_2(df, "mss_anomaly_wind_gdt")

In [None]:
fig = px.density_heatmap(df, y="micro_mass", x="peaks" , color_continuous_scale=px.colors.sequential.Blackbody)
fig.update_layout(
    xaxis_title= r"$\text{Detected Peaks}$",
    yaxis_title= r"$\text{Microplastic mass, (g/km^2, log10 scale)}$",
    legend_title="Legend Title",
    font=dict(size=25,),
)
fig.show()

In [None]:
bin_df = pd.DataFrame({'mss':df.mss_anomaly_new_regions_refitted, 'micro' : df.micro_mass })
bin_df['mss_ano'] = pd.cut(bin_df['mss'], bins = np.linspace(bin_df.mss.min(), bin_df.mss.max(), 40)).apply(lambda x: x.left)
bin_df
mean_micro_by_bin = bin_df.groupby(['mss_ano'], as_index=False)['micro'].mean()
mean_micro_by_bin
plt.step(
    mean_micro_by_bin['mss_ano'],
    mean_micro_by_bin['micro'],
    where='mid',
)
plt.xlabel('MSS anomaly #3')
plt.ylabel('Microplastic mass, (g/km^2, log10 scale)')
plt.savefig("v3.0_results/" + "box_plot" + ".png", bbox_inches='tight')
plt.show()

In [None]:
#Average spatially
def make_data_grided(df):
    df["lon"] = df["lon"].apply(lambda lon: round(lon))
    df["lat"] = df["lat"].apply(lambda lat: round(lat))
    return df

def interp_microplastics(df):
    directory = "C:/Users/syversk/Desktop"
    mic_df = pd.read_csv(directory + "/" + "micro_df.csv")
    interp_micro_mass = LinearNDInterpolator(list(zip(mic_df['lon'], mic_df['lat'])), mic_df['vansebillemodel_mass_log'])
    interp_micro_abu = LinearNDInterpolator(list(zip(mic_df['lon'], mic_df['lat'])), mic_df['vansebillemodel_abundance_log'])
    df['micro_mass'] = interp_micro_mass(df['lon'], df['lat'])
    df['abundace'] = interp_micro_abu(df['lon'], df['lat'])
    return df

#Average all temporally
def group_dataframe_temporally(df):
    df = df.groupby(['lon', 'lat'], as_index=False)[['mss_ano_w_gdt_pasific', 'mss_ano_d_gdt_pasific'
                                                    , 'mss_ano_w_gdt_towards_cr', 'mss_ano_d_gdt_towards_cr'
                                                    , 'mss_ano_w_refitted', 'mss_ano_d_refitted',
                                                    "mss_ano_w_towards", "mss_ano_d_towards"]].mean()
    return df


def reduce_based_on_sd(df):
    z_scores = stats.zscore(df)
    abs_z_scores = np.abs(z_scores)
    filtered_entries = (abs_z_scores < 3).all(axis=1)
    return df[filtered_entries]

def plot_var_2(df, var):
    if var[0:3] != "mic" and var[0:3] != "pea":  
        mycmap = plt.get_cmap('jet').reversed()
    else:
        mycmap = plt.get_cmap('jet')
    proj = ccrs.PlateCarree(180)
    ax = plt.axes(projection=proj)
    
    ax.coastlines()
    ax.add_feature(cart.feature.LAND, zorder=100, edgecolor='k')
    ax.gridlines(draw_labels=True, alpha=0.5)
    lon_formatter = LongitudeFormatter(zero_direction_label=True)
    lat_formatter = LatitudeFormatter()
    ax.xaxis.set_major_formatter(lon_formatter)
    ax.yaxis.set_major_formatter(lat_formatter)
    plt.scatter(df['lon']+180, df['lat'], c=list(df[var]), cmap=mycmap)
    bar = plt.colorbar(pad=0.15, orientation='horizontal')
    plt.title(var)
    bar.ax.set_title('[g/km^2, log10 scale]')
    plt.xlabel('Longitude')
    plt.ylabel('Latitude')
    plt.savefig("v3.0_results/" + var + ".png", bbox_inches='tight')
    plt.show()
    
def find_highest_corrolation_boundries(df, number_ints ,param = "mss_anomaly_delta_refitted"):
    number = np.linspace(df[param].min(), df[param].max(), number_ints).tolist()
    max_corr = 0
    for numb in number:
        for numb2 in number:
            df_reduced = df[df[param] < numb]
            df_reduced = df_reduced[df_reduced[param]> numb2]
            if len(df_reduced) > len(df)*2/3:
                corr = df_reduced.corr()[param].micro_mass
                if abs(corr) > abs(max_corr):
                    max_corr = corr
                    max_int = numb
                    min_int = numb2
    print(max_corr, max_int, min_int)
    df_reduced_optimally = df[df[param] < max_int]
    return df_reduced_optimally[df_reduced_optimally[param] > min_int] 

def peak_interp(lat, lon):
    df_andreas = pd.read_csv("C:/Users/syversk/Desktop/peaks_fixed/peaks_3_5_winds.csv")
    try:
        return df_andreas[(df_andreas.lat == lat) & (df_andreas.lon == lon)].num_peaks.iloc[0]
    except:
        return np.nan

In [None]:
import datetime
# GOES FROM MONTLY MSS ANOMALY OBSERVATIONS TO ALL TIME MSS ANOMALY WITH MICROPLASTICS INTERPOLTATED 1x1
directory = "C:/Users/syversk/Desktop/mss_v3.0/monthly_mss_ano"
files = os.listdir(directory)
df_list = []
for i in tqdm(range(len(files))):
    df_tmp = pd.read_csv(directory + "/" + files[i])
    year = files[i][0:4]
    month = files[i][5:7]
    date = datetime.date(year=int(year), month=int(month), day=1)
    df_tmp["date"] = date   
    df_list.append(df_tmp)
df_all_months = pd.concat(df_list)
df_all_months = interp_microplastics(df_all_months)
df_all_months.rename(columns = {'mss_ano_w_gdt_pasific':'mss_anomaly_new_regions_refitted', 'mss_ano_w_gdt_towards_cr':'mss_anomaly_old_regions_refitted',
                    'mss_ano_w_towards':'mss_anomaly_old_regions_refrence'}, inplace = True)

In [None]:
south = reduce_area_of_df_pacific(df_all_months, "south")
north = reduce_area_of_df_pacific(df_all_months, "north")
south = pd.DataFrame({"date": south.date, "South_Pasific": south.mss_anomaly_new_regions_refitted})
north = pd.DataFrame({"date": north.date, "North_Pasific": north.mss_anomaly_new_regions_refitted})

In [None]:
def reduce_area_of_df_pacific(df_all_months, cr):
    if cr == "north":
        df_all_months = df_all_months[df_all_months.lat <= 35]
        df_all_months = df_all_months[df_all_months.lat >= 20]
        df_all_months = df_all_months[df_all_months.lon <= 150]
        df_all_months = df_all_months[df_all_months.lon <= 230]
    else:
        df_all_months = df_all_months[df_all_months.lat <= -20]
        df_all_months = df_all_months[df_all_months.lat >= -35]
        df_all_months = df_all_months[df_all_months.lon <= 150]
        df_all_months = df_all_months[df_all_months.lon <= 230]
    return df_all_months


In [None]:
df = pd.concat([south, north])
df = df.groupby(['date'], as_index=False)[['South_Pasific', "North_Pasific"]].mean()
df.plot(x='date', y=["South_Pasific", 'North_Pasific'], figsize=(16,8), x_compat=True)

In [None]:
df = pd.concat([south, north])
df = df.groupby(['date'], as_index=False)[['south', "north"]].mean()
df.plot(x='date', y=["north", 'south'])

In [None]:
directory = "C:/Users/syversk/Desktop"
mic_df = pd.read_csv(directory + "/" + "micro_df.csv")
min(mic_df.lon)