# mounting drive for data access

In [1]:
from google.colab import drive  
drive._mount('/content/drive') 

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# installing libraries


In [2]:
!pip install cdsapi
!pip install terality



In [3]:
import cdsapi
import netCDF4 as nc
from netCDF4 import num2date
from netCDF4 import Dataset
import numpy as np
import os
import terality as te
import pandas as pd
import xarray as xr

In [4]:
!terality account configure --email seafishleo@gmail.com

Your Terality API key: tla_zEJlMW8d7vpwG9Kdo2OcWupmEUvHQkfDwTX48slntkXt7KCFyNDxGYxVwXXk2cAUrX
Are you sure you want to overwrite the existing Terality configuration? [y/N]: y
Terality account succesfully configured on this system.




# Read Data from Drive 

In [5]:
my_nc_file = '/content/drive/MyDrive/data/ERA5_monthly_north.csv'
north_data = te.read_csv(my_nc_file)
north_data.head(100)

Unnamed: 0,expver,latitude,longitude,time,t2m,siconc,asn,smlt,sp,tco3
0,1,90.0,-180.0,1979-01-01,244.70773,0.976897,0.879997,0.0,102661.984,0.007461
1,1,90.0,-180.0,1979-02-01,241.44653,0.987060,0.880002,0.0,101824.310,0.009453
2,1,90.0,-180.0,1979-03-01,246.76079,0.982024,0.879997,0.0,103086.290,0.010147
3,1,90.0,-180.0,1979-04-01,252.45631,0.990020,0.880002,0.0,102567.760,0.009882
4,1,90.0,-180.0,1979-05-01,265.00360,0.989471,0.880008,0.0,102258.340,0.008224
...,...,...,...,...,...,...,...,...,...,...
95,1,90.0,-180.0,1986-12-01,252.07503,0.955686,0.880002,0.0,101387.250,0.006347
96,1,90.0,-180.0,1987-01-01,248.04338,0.956327,0.880002,0.0,101365.360,0.008430
97,1,90.0,-180.0,1987-02-01,246.08621,0.971282,0.880002,0.0,102285.090,0.010711
98,1,90.0,-180.0,1987-03-01,244.12340,0.977477,0.880002,0.0,102404.234,0.010109


In [None]:
# USING XARRAY
#my_nc_file = '/content/drive/MyDrive/data/ERA5_monthly_north.nc'
#ds = xr.open_dataset(my_nc_file)
#north_data = ds.to_dataframe()
#north_data.head(100)

In [6]:
north_data.dtypes

expver         int64
latitude     float64
longitude    float64
time          object
t2m          float64
siconc       float64
asn          float64
smlt         float64
sp           float64
tco3         float64
dtype: object

In [7]:
len(north_data)

141177600

In [8]:
north_data.count()

expver       141177600
latitude     141177600
longitude    141177600
time         141177600
t2m           70588800
siconc        52533444
asn           70588800
smlt          70588800
sp            70588800
tco3          70588800
dtype: int64

# Rename  & Calculation

In [9]:
north_data__renamed = north_data.rename(columns={'t2m':'temperature_2m','siconic':'sea_ice_area_fraction','asn':'Snow_albedo','smlt':'snow_melt','sp':'surface_pressure','tco3':
'Total_column_ozone'})

In [13]:
north_data__renamed = north_data__renamed.drop(['expver','latitude','longitude'],axis=1)

In [14]:
north_avg = north_data__renamed.groupby('time').mean()

In [15]:
north_avg

Unnamed: 0_level_0,temperature_2m,siconc,Snow_albedo,snow_melt,surface_pressure,Total_column_ozone
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1979-01-01,248.048989,0.886943,0.842807,2.149000e-07,99971.627492,0.007569
1979-02-01,243.439591,0.900277,0.836100,7.621431e-07,99641.237045,0.009261
1979-03-01,249.055998,0.892728,0.834508,8.765569e-07,100240.042241,0.010040
1979-04-01,254.599298,0.901768,0.838258,3.177189e-05,100369.270964,0.009609
1979-05-01,265.495457,0.885108,0.842589,3.034634e-04,99839.279702,0.008514
...,...,...,...,...,...,...
2021-08-01,275.370898,0.495820,0.875406,6.368199e-05,99085.033604,0.006339
2021-09-01,270.590873,0.496488,0.869970,1.973691e-05,99353.440456,0.006108
2021-10-01,263.929047,0.641613,0.860190,2.083629e-05,99299.263246,0.006653
2021-11-01,255.514865,0.790987,0.853753,9.009999e-06,99306.969931,0.006793


In [18]:
north_avg.dtypes

temperature_2m        float64
siconc                float64
Snow_albedo           float64
snow_melt             float64
surface_pressure      float64
Total_column_ozone    float64
dtype: object

# Load Seaice Extent Dataframe

In [143]:
n_seaice_df = pd.read_csv('/content/drive/MyDrive/data/N_seaice_extent_daily_v3.0.csv')
n_seaice_df

Unnamed: 0,Year,Month,Day,Extent,Missing,Source Data
0,YYYY,MM,DD,10^6 sq km,10^6 sq km,Source data product web sites: http://nsidc.o...
1,1978,10,26,10.231,0.000,['/ecs/DP1/PM/NSIDC-0051.001/1978.10.26/nt_19...
2,1978,10,28,10.420,0.000,['/ecs/DP1/PM/NSIDC-0051.001/1978.10.28/nt_19...
3,1978,10,30,10.557,0.000,['/ecs/DP1/PM/NSIDC-0051.001/1978.10.30/nt_19...
4,1978,11,01,10.670,0.000,['/ecs/DP1/PM/NSIDC-0051.001/1978.11.01/nt_19...
...,...,...,...,...,...,...
14123,2022,01,04,13.436,0.000,['/ecs/DP1/PM/NSIDC-0081.001/2022.01.03/nt_202...
14124,2022,01,05,13.477,0.000,['/ecs/DP1/PM/NSIDC-0081.001/2022.01.04/nt_202...
14125,2022,01,06,13.537,0.000,['/ecs/DP1/PM/NSIDC-0081.001/2022.01.05/nt_202...
14126,2022,01,07,13.520,0.000,['/ecs/DP1/PM/NSIDC-0081.001/2022.01.06/nt_202...


In [144]:
n_seaice_df.drop([' Source Data','    Missing',' Day'], axis = 1, inplace = True)
n_seaice_df = n_seaice_df[n_seaice_df.Year != '1978']
n_seaice_df = n_seaice_df[n_seaice_df.Year != '2022']
n_seaice_df = n_seaice_df.iloc[1: , :]
n_seaice_df.reset_index(drop=True, inplace=True)
n_seaice_df

Unnamed: 0,Year,Month,Extent
0,1979,01,14.997
1,1979,01,14.922
2,1979,01,14.929
3,1979,01,14.968
4,1979,01,15.190
...,...,...,...
14080,2021,12,12.947
14081,2021,12,12.978
14082,2021,12,13.050
14083,2021,12,13.149


In [145]:
n_seaice_df.dtypes

Year           object
 Month         object
     Extent    object
dtype: object

In [146]:
n_seaice_df["time"] = n_seaice_df["Year"].astype(str) + '-' + n_seaice_df[" Month"].str.strip()+'-01'
n_seaice_df['     Extent'] = n_seaice_df['     Extent'].astype(float)
n_seaice_df.drop(['Year',' Month'], axis = 1, inplace = True)
n_seaice_df

Unnamed: 0,Extent,time
0,14.997,1979-01-01
1,14.922,1979-01-01
2,14.929,1979-01-01
3,14.968,1979-01-01
4,15.190,1979-01-01
...,...,...
14080,12.947,2021-12-01
14081,12.978,2021-12-01
14082,13.050,2021-12-01
14083,13.149,2021-12-01


In [147]:
n_seaice_avg = n_seaice_df.groupby('time').mean()
n_seaice_avg

Unnamed: 0_level_0,Extent
time,Unnamed: 1_level_1
1979-01-01,15.414000
1979-02-01,16.175286
1979-03-01,16.341938
1979-04-01,15.446800
1979-05-01,13.856867
...,...
2021-08-01,5.753871
2021-09-01,4.915367
2021-10-01,6.770258
2021-11-01,9.808500


# Join Two Dataframes

In [149]:
north_joined_df = north_avg.join(n_seaice_avg)
north_joined_df

INFO:terality:The result of te.dataframe.from_pandas was retrieved from cache [docs: https://docs.terality.com/getting-terality/user-guide/caching].


Unnamed: 0_level_0,temperature_2m,siconc,Snow_albedo,snow_melt,surface_pressure,Total_column_ozone,Extent
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1979-01-01,248.048989,0.886943,0.842807,2.149000e-07,99971.627492,0.007569,15.414000
1979-02-01,243.439591,0.900277,0.836100,7.621431e-07,99641.237045,0.009261,16.175286
1979-03-01,249.055998,0.892728,0.834508,8.765569e-07,100240.042241,0.010040,16.341938
1979-04-01,254.599298,0.901768,0.838258,3.177189e-05,100369.270964,0.009609,15.446800
1979-05-01,265.495457,0.885108,0.842589,3.034634e-04,99839.279702,0.008514,13.856867
...,...,...,...,...,...,...,...
2021-08-01,275.370898,0.495820,0.875406,6.368199e-05,99085.033604,0.006339,5.753871
2021-09-01,270.590873,0.496488,0.869970,1.973691e-05,99353.440456,0.006108,4.915367
2021-10-01,263.929047,0.641613,0.860190,2.083629e-05,99299.263246,0.006653,6.770258
2021-11-01,255.514865,0.790987,0.853753,9.009999e-06,99306.969931,0.006793,9.808500
