# Are the trends in the Caribbean low cloud record "real"?
### - Are they similar from station to station - question??? what do we mean by similarity
### - Are the related to trends in variables that are known to control cloudiness? eg SST, low level stability
### - Is the relationship sensible? (similar to the controls operating at other time scales)
### - Can we extrapolate from the trend?

### Let's begin by loading all the requisite libraries and data

In [1]:
import xarray as xr
import numpy as np
import pandas as pd
import datetime
import matplotlib.pyplot as plt

## We begin with calculating the monthly percentage occurences for st lucia.

In [5]:
filename = '/Users/dessyb/Documents/GitHub/station_data/saint_lucia_1971-2020.nc'
stl = xr.open_dataset(filename).to_pandas(); 

# append a column with datetime and make it the Index
stl['Datetime'] = pd.to_datetime(stl[['year', 'month', 'day', 'hour']])
stl = stl.set_index('Datetime')

# drop some redundant columns
stl.drop(['station','name','year','month','hour','day','elevation'], axis=1, inplace=True)

stl.head()

Unnamed: 0_level_0,Unnamed: 0,lat,lon,ws,wd,n,slp,at,h,nh,cl,cm,ch,visibility,dwpt,precip
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
1972-04-21 15:00:00,0,13.8,-61.0,72.0,90.0,2.0,10141.0,280.0,5.0,2.0,1.0,0.0,0.0,,,
1972-04-22 15:00:00,1,13.8,-61.0,51.0,80.0,5.0,10153.0,280.0,5.0,3.0,8.0,7.0,2.0,,,
1972-04-24 15:00:00,2,13.8,-61.0,51.0,90.0,7.0,10147.0,250.0,4.0,4.0,8.0,2.0,-1.0,,,
1972-05-17 12:00:00,3,13.8,-61.0,51.0,120.0,1.0,10131.0,280.0,5.0,1.0,1.0,0.0,0.0,,,
1972-05-19 15:00:00,4,13.8,-61.0,51.0,120.0,4.0,10136.0,300.0,4.0,4.0,8.0,0.0,1.0,,,


### Data Management and Quality Control

In [10]:
stl.lon = stl.lon.min() # fix longitude to negative 
stl.mask(stl.dwpt > 320, inplace=True) # Throw out whole record when dwpt is absurd

### Calculating Monthly Percentage Occurences

In [12]:
for i in range(10): #for loop that extracts each low cloud type from the low cloud data array within the st lucia dataset
    cmd = "stl['lc"+str(i)+"'] = (stl['cl'] =="+str(i)+')'
    print(cmd)
    exec(cmd)
    
stl_momeans=stl.groupby([(stl.index.year),(stl.index.month)]).mean() #performing groupby mean

stl_momeancloud = stl_momeans[['lc0','lc1','lc2','lc3','lc4','lc5','lc6','lc7','lc8','lc9']]

stl_seasmeans = stl_momeancloud.rolling(3).mean() #performing rolling mean to smooth data at the seasonal scale

stl_seasmeans.head(14) #viewing data

stl['lc0'] = (stl['cl'] ==0)
stl['lc1'] = (stl['cl'] ==1)
stl['lc2'] = (stl['cl'] ==2)
stl['lc3'] = (stl['cl'] ==3)
stl['lc4'] = (stl['cl'] ==4)
stl['lc5'] = (stl['cl'] ==5)
stl['lc6'] = (stl['cl'] ==6)
stl['lc7'] = (stl['cl'] ==7)
stl['lc8'] = (stl['cl'] ==8)
stl['lc9'] = (stl['cl'] ==9)


Unnamed: 0_level_0,Unnamed: 1_level_0,lc0,lc1,lc2,lc3,lc4,lc5,lc6,lc7,lc8,lc9
Datetime,Datetime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1972,4,,,,,,,,,,
1972,5,,,,,,,,,,
1972,6,0.0,0.288889,0.333333,0.0,0.0,0.0,0.0,0.0,0.344444,0.0
1972,7,0.0,0.219444,0.458333,0.020833,0.0,0.020833,0.0,0.0,0.247222,0.0
1972,8,0.0,0.225,0.358333,0.020833,0.0,0.020833,0.0,0.0,0.341667,0.0
1972,9,0.0,0.264394,0.312879,0.020833,0.0,0.020833,0.0,0.0,0.381061,0.0
1972,10,0.0,0.356061,0.387879,0.0,0.0,0.0,0.0,0.0,0.256061,0.0
1972,12,0.0,0.239394,0.654545,0.0,0.0,0.0,0.0,0.0,0.106061,0.0
1973,2,0.0,0.17037,0.718519,0.0,0.0,0.0,0.0,0.0,0.111111,0.0
1973,3,0.0,0.074074,0.703704,0.0,0.0,0.0,0.0,0.0,0.185185,0.037037


## Having successfully calculated the mmonthly low cloud percentage occurences for st lucia, let's do it for the other stations

In [31]:
filename = '/Users/dessyb/Documents/GitHub/station_data/puerto_vallerta_1971-2020.nc'
pvl = xr.open_dataset(filename).to_pandas(); 

# append a column with datetime and make it the Index
pvl['Datetime'] = pd.to_datetime(pvl[['year', 'month', 'day', 'hour']])
pvl = pvl.set_index('Datetime')

# drop some redundant columns
pvl.drop(['station','name','year','month','hour','day','elevation'], axis=1, inplace=True)

pvl.lon = pvl.lon.min() # fix longitude to negative 
pvl.mask(pvl.dwpt > 320, inplace=True) # Throw out whole record when dwpt is absurd

for i in range(10): #for loop that extracts each low cloud type from the low cloud data array within the st lucia dataset
    cmd = "pvl['lc"+str(i)+"'] = (pvl['cl'] =="+str(i)+')'
    exec(cmd)
    
pvl_momeans=pvl.groupby([(pvl.index.year),(pvl.index.month)]).mean() #performing groupby mean

pvl_momeancloud = pvl_momeans[['lc0','lc1','lc2','lc3','lc4','lc5','lc6','lc7','lc8','lc9']]

pvl_seasmeans = pvl_momeancloud.rolling(3).mean() #performing rolling mean to smooth data at the seasonal scale

filename = '/Users/dessyb/Documents/GitHub/station_data/puerto_rico_1971-2020.nc'
prc = xr.open_dataset(filename).to_pandas(); 

# append a column with datetime and make it the Index
prc['Datetime'] = pd.to_datetime(prc[['year', 'month', 'day', 'hour']])
prc = prc.set_index('Datetime')

# drop some redundant columns
prc.drop(['station','name','year','month','hour','day','elevation'], axis=1, inplace=True)

prc.lon = prc.lon.min() # fix longitude to negative 
prc.mask(prc.dwpt > 320, inplace=True) # Throw out whole record when dwpt is absurd

for i in range(10): #for loop that extracts each low cloud type from the low cloud data array within the st lucia dataset
    cmd = "prc['lc"+str(i)+"'] = (prc['cl'] =="+str(i)+')'
    exec(cmd)
    
prc_momeans=prc.groupby([(prc.index.year),(prc.index.month)]).mean() #performing groupby mean

prc_momeancloud = prc_momeans[['lc0','lc1','lc2','lc3','lc4','lc5','lc6','lc7','lc8','lc9']]

prc_seasmeans = prc_momeancloud.rolling(3).mean() #performing rolling mean to smooth data at the seasonal scale

filename = '/Users/dessyb/Documents/GitHub/station_data/jamaica_1971-2020.nc'
jam = xr.open_dataset(filename).to_pandas(); 

# append a column with datetime and make it the Index
jam['Datetime'] = pd.to_datetime(jam[['year', 'month', 'day', 'hour']])
jam = jam.set_index('Datetime')

# drop some redundant columns
jam.drop(['station','name','year','month','hour','day','elevation'], axis=1, inplace=True)

jam.lon = jam.lon.min() # fix longitude to negative 
jam.mask(jam.dwpt > 320, inplace=True) # Throw out whole record when dwpt is absurd

for i in range(10): #for loop that extracts each low cloud type from the low cloud data array within the st lucia dataset
    cmd = "jam['lc"+str(i)+"'] = (jam['cl'] =="+str(i)+')'
    exec(cmd)
    
jam_momeans=jam.groupby([(jam.index.year),(jam.index.month)]).mean() #performing groupby mean

jam_momeancloud = jam_momeans[['lc0','lc1','lc2','lc3','lc4','lc5','lc6','lc7','lc8','lc9']]

jam_seasmeans = jam_momeancloud.rolling(3).mean() #performing rolling mean to smooth data at the seasonal scale

filename = '/Users/dessyb/Documents/GitHub/station_data/dominican_republic_1971-2020.nc'
drc = xr.open_dataset(filename).to_pandas(); 

# append a column with datetime and make it the Index
drc['Datetime'] = pd.to_datetime(drc[['year', 'month', 'day', 'hour']])
drc = drc.set_index('Datetime')

# drop some redundant columns
drc.drop(['station','name','year','month','hour','day','elevation'], axis=1, inplace=True)

drc.lon = drc.lon.min() # fix longitude to negative 

for i in range(10): #for loop that extracts each low cloud type from the low cloud data array within the st lucia dataset
    cmd = "drc['lc"+str(i)+"'] = (drc['cl'] =="+str(i)+')'
    exec(cmd)
    
drc_momeans=drc.groupby([(drc.index.year),(drc.index.month)]).mean() #performing groupby mean

drc_momeancloud = drc_momeans[['lc0','lc1','lc2','lc3','lc4','lc5','lc6','lc7','lc8','lc9']]

drc_seasmeans = drc_momeancloud.rolling(3).mean() #performing rolling mean to smooth data at the seasonal scale

filename = '/Users/dessyb/Documents/GitHub/station_data/cuba_1971-2020.nc'
cub = xr.open_dataset(filename).to_pandas(); 

# append a column with datetime and make it the Index
cub['Datetime'] = pd.to_datetime(cub[['year', 'month', 'day', 'hour']])
cub = cub.set_index('Datetime')

# drop some redundant columns
cub.drop(['station','name','year','month','hour','day','elevation'], axis=1, inplace=True)

cub.lon = cub.lon.min() # fix longitude to negative 
cub.mask(cub.dwpt > 320, inplace=True) # Throw out whole record when dwpt is absurd

for i in range(10): #for loop that extracts each low cloud type from the low cloud data array within the st lucia dataset
    cmd = "cub['lc"+str(i)+"'] = (cub['cl'] =="+str(i)+')'
    exec(cmd)
    
cub_momeans=cub.groupby([(cub.index.year),(cub.index.month)]).mean() #performing groupby mean

cub_momeancloud = cub_momeans[['lc0','lc1','lc2','lc3','lc4','lc5','lc6','lc7','lc8','lc9']]

cub_seasmeans = cub_momeancloud.rolling(3).mean() #performing rolling mean to smooth data at the seasonal scale

filename = '/Users/dessyb/Documents/GitHub/station_data/bermuda_1971-2020.nc'
ber = xr.open_dataset(filename).to_pandas(); 

# append a column with datetime and make it the Index
ber['Datetime'] = pd.to_datetime(ber[['year', 'month', 'day', 'hour']])
ber = ber.set_index('Datetime')

# drop some redundant columns
ber.drop(['station','year','month','hour','day','elevation'], axis=1, inplace=True)

ber.lon = ber.lon.min() # fix longitude to negative 
ber.mask(ber.dwpt > 320, inplace=True) # Throw out whole record when dwpt is absurd

for i in range(10): #for loop that extracts each low cloud type from the low cloud data array within the st lucia dataset
    cmd = "ber['lc"+str(i)+"'] = (ber['cl'] =="+str(i)+')'
    exec(cmd)
    
ber_momeans=ber.groupby([(ber.index.year),(ber.index.month)]).mean() #performing groupby mean

ber_momeancloud = ber_momeans[['lc0','lc1','lc2','lc3','lc4','lc5','lc6','lc7','lc8','lc9']]

ber_seasmeans = ber_momeancloud.rolling(3).mean() #performing rolling mean to smooth data at the seasonal scale

filename = '/Users/dessyb/Documents/GitHub/station_data/bahamas_1971-2020.nc'
bah = xr.open_dataset(filename).to_pandas(); 

# append a column with datetime and make it the Index
bah['Datetime'] = pd.to_datetime(bah[['year', 'month', 'day', 'hour']])
bah = bah.set_index('Datetime')

# drop some redundant columns
bah.drop(['station','name','year','month','hour','day','elevation'], axis=1, inplace=True)

bah.lon = bah.lon.min() # fix longitude to negative 
bah.mask(bah.dwpt > 320, inplace=True) # Throw out whole record when dwpt is absurd

for i in range(10): #for loop that extracts each low cloud type from the low cloud data array within the st lucia dataset
    cmd = "bah['lc"+str(i)+"'] = (bah['cl'] =="+str(i)+')'
    exec(cmd)
    
bah_momeans=bah.groupby([(bah.index.year),(bah.index.month)]).mean() #performing groupby mean

bah_momeancloud = bah_momeans[['lc0','lc1','lc2','lc3','lc4','lc5','lc6','lc7','lc8','lc9']]

bah_seasmeans = bah_momeancloud.rolling(3).mean() #performing rolling mean to smooth data at the seasonal scale

filename = '/Users/dessyb/Documents/GitHub/station_data/barbados_1971-2015.nc'
bar = xr.open_dataset(filename).to_pandas(); 

# append a column with datetime and make it the Index
bar['Datetime'] = pd.to_datetime(bar[['year','month', 'day', 'hour']])
bar = bar.set_index('Datetime')

# drop some redundant columns
bar.drop(['station','name','year','month','hour','day','elevation'], axis=1, inplace=True)

bar.lon = bar.lon.min() # fix longitude to negative 

for i in range(10): #for loop that extracts each low cloud type from the low cloud data array within the st lucia dataset
    cmd = "bar['lc"+str(i)+"'] = (bar['cl'] =="+str(i)+')'
    exec(cmd)
    
bar_momeans=bar.groupby([(bar.index.year),(bar.index.month)]).mean() #performing groupby mean

bar_momeancloud = bar_momeans[['lc0','lc1','lc2','lc3','lc4','lc5','lc6','lc7','lc8','lc9']]

bar_seasmeans = bar_momeancloud.rolling(3).mean() #performing rolling mean to smooth data at the seasonal scale

In [19]:
bah_seasmeans

Unnamed: 0_level_0,Unnamed: 1_level_0,lc0,lc1,lc2,lc3,lc4,lc5,lc6,lc7,lc8,lc9
Datetime,Datetime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1971,1,,,,,,,,,,
1971,2,,,,,,,,,,
1971,3,0.275892,0.101918,0.168790,0.006568,0.010086,0.291261,2.164502e-03,0.001642,0.129354,0.007399
1971,4,0.337760,0.118232,0.169631,0.009655,0.007300,0.280618,0.000000e+00,0.001642,0.061293,0.007399
1971,5,0.311165,0.116082,0.234861,0.032872,0.004844,0.217293,0.000000e+00,0.001642,0.031687,0.041427
...,...,...,...,...,...,...,...,...,...,...,...
2020,11,0.010322,0.172850,0.143137,0.006941,0.029042,0.019023,5.782412e-19,0.001727,0.325757,0.272141
2020,12,0.020793,0.172940,0.095447,0.005291,0.050824,0.127709,1.060606e-02,0.001727,0.312946,0.166395
2021,1,0.028833,0.140760,0.085750,0.008281,0.063347,0.226132,1.359560e-02,0.001495,0.326275,0.044800
2021,2,0.050738,0.225082,0.066162,0.006934,0.060655,0.237454,1.359560e-02,0.001495,0.251731,0.009944


Unnamed: 0,lc0,lc1,lc2,lc3,lc4,lc5,lc6,lc7,lc8,lc9
0,,,,,,,,,,
1,,,,,,,,,,
2,,,,,,,,,,
3,,,,,,,,,,
