In [1]:
#1. TIME SERIES PLOT FOR SELECTED STATIONS
import numpy as np
import netCDF4 as nc
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import os
import datetime

knmipath = '/nobackup/users/killaars/PhD_data/runs_may_2017/benchmarkrun/'
macbookpath = '/Users/killaars/PhD/runs_may_2017/benchmarkrun/'
path = knmipath

fin_obs = []
time = []
obs_num = []
lat = []
lon = []
alt = []
fmp_obs0 = []
fesm_obs = []
for root, dirs, files in os.walk(os.path.join(path,'sample_flasks')):
    for file in sorted(files):
        if file.startswith('sample_coordinates_'):
            #print(file,file[19:])
            #print('flask_output_TM5_MP_'+file[19:])            
            #print('flask_output_ECE_'+file[19:])
            TM5_MP_filename = 'flask_output_TM5_MP_'+file[19:]
            ECE_filename = 'flask_output_ECE_'+file[19:]
            
            flask_in = os.path.join(path,'sample_flasks',file)
            flask_mp = os.path.join(path,'TM5_offline',TM5_MP_filename)
            flask_esm = os.path.join(path,'TM5_ESM',ECE_filename)

            flask_in_fh = nc.Dataset(flask_in,mode='r')
            flask_mp_fh = nc.Dataset(flask_mp,mode='r')
            flask_esm_fh = nc.Dataset(flask_esm,mode='r')
            
            x = 40000000
            fin_obs = np.append(fin_obs,flask_in_fh.variables['observed'][:x])
            fin_date = flask_in_fh.variables['date_components'][:x]
            obs_num = np.append(obs_num,flask_in_fh.variables['obs_num'][:x])
            lat = np.append(lat,flask_in_fh.variables['latitude'][:x])
            lon = np.append(lon,flask_in_fh.variables['longitude'][:x])
            alt = np.append(alt,flask_in_fh.variables['altitude'][:x])
            fmp_obs0 = np.append(fmp_obs0,flask_mp_fh.variables['flask'][:x,0]) #Are 10 members. First check if it works with 1
            fesm_obs = np.append(fesm_obs,flask_esm_fh.variables['flask'][:x,0])
            
            flask_in_fh.close()
            flask_mp_fh.close()
            flask_esm_fh.close()

            #check if the shapes are similar, meaning that they have the same number of points
            #print(np.shape(fin_obs),np.shape(fmp_obs0),np.shape(fesm_obs))
            
            #Code to transform the date components of the input file to a pandas series of timestamps
            N = (len(fin_date))
            base = datetime.datetime(1900, 1, 1)
            time_intermediate = np.array([base + datetime.timedelta(hours=i) for i in range(N)])
            for i in range(N):
                dt = datetime.datetime(fin_date[i,0],fin_date[i,1],fin_date[i,2],fin_date[i,3],fin_date[i,4],fin_date[i,5])
                time_intermediate[i] = pd.Timestamp(dt)
            time = np.append(time,time_intermediate)    

df = pd.DataFrame({'A_fin_obs' : fin_obs*1000000,
                   'B_fmp_obs' : fmp_obs0*1000000,
                   'C_fesm_obs': fesm_obs*1000000,
                   'D_alt'     : alt,
                   'E_lat'     : lat,
                   'F_lon'     : lon,
                   'G_date'    : time},index=obs_num)


CBW = (df.E_lat>51) & (df.E_lat<52) & (df.F_lon>4) & (df.F_lon<5) & (df.D_alt>150) & (df.D_alt<200)#Cabauw in the Netherlands
PAL = (df.E_lat>67) & (df.E_lat<68) & (df.F_lon>24) & (df.F_lon<25) #PALLAS in Finland
LEF = (df.E_lat>45) & (df.E_lat<46) & (df.F_lon>-91) & (df.F_lon<-90) #Park Falls in Wisconsin (USA)
JFJ = (df.E_lat>46) & (df.E_lat<47) & (df.F_lon>7) & (df.F_lon<8) #Jungfraujoch in Switserland
#print(df[JFJ])

f, (ax1, ax2,ax3,ax4) = plt.subplots(4, 1, sharex=True)
size = 2
alpha=.3
mask = LEF
ax1.set_title('LEF')
ax1.plot_date(x=df.G_date[mask], y=df.A_fin_obs[mask], color='g', alpha=alpha, ms=size, label='Observations')
ax1.plot_date(x=df.G_date[mask], y=df.B_fmp_obs[mask], color='b', alpha=alpha, ms=size, label='TM5-Offline')
ax1.plot_date(x=df.G_date[mask], y=df.C_fesm_obs[mask], color='r', alpha=alpha, ms=size, label='TM5-ESM')

mask=CBW
ax2.set_title('CBW')
ax2.plot_date(x=df.G_date[mask], y=df.A_fin_obs[mask], color='g', alpha=alpha, ms=size, label='Observations')
ax2.plot_date(x=df.G_date[mask], y=df.B_fmp_obs[mask], color='b', alpha=alpha, ms=size, label='TM5-Offline')
ax2.plot_date(x=df.G_date[mask], y=df.C_fesm_obs[mask], color='r', alpha=alpha, ms=size, label='TM5-ESM')

mask=PAL
ax3.set_title('PAL')
ax3.plot_date(x=df.G_date[mask], y=df.A_fin_obs[mask], color='g', alpha=alpha, ms=size, label='Observations')
ax3.plot_date(x=df.G_date[mask], y=df.B_fmp_obs[mask], color='b', alpha=alpha, ms=size, label='TM5-Offline')
ax3.plot_date(x=df.G_date[mask], y=df.C_fesm_obs[mask], color='r', alpha=alpha, ms=size, label='TM5-ESM')

mask=JFJ
ax4.set_title('JFJ')
ax4.plot_date(x=df.G_date[mask], y=df.A_fin_obs[mask], color='g', alpha=alpha, ms=size, label='Observations')
ax4.plot_date(x=df.G_date[mask], y=df.B_fmp_obs[mask], color='b', alpha=alpha, ms=size, label='TM5-Offline')
ax4.plot_date(x=df.G_date[mask], y=df.C_fesm_obs[mask], color='r', alpha=alpha, ms=size, label='TM5-ESM')

ax2.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.show()



In [32]:
#1. TIME SERIES PLOT FOR SELECTED STATIONS
import numpy as np
import netCDF4 as nc
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import os
import datetime

knmipath = '/nobackup/users/killaars/PhD_data/runs_may_2017/benchmarkrun/'
macbookpath = '/Users/killaars/PhD/runs_may_2017/benchmarkrun/'
path = knmipath

fin_obs = []
time = []
obs_num = []
lat = []
lon = []
alt = []
fmp_obs0 = []
fesm_obs = []
for root, dirs, files in os.walk(os.path.join(path,'sample_flasks')):
    for file in sorted(files):
#        if file.startswith('sample_coordinates_'):
        if file.startswith('sample_coordinates_2007110300_2007111000'):
            #print(file,file[19:])
            #print('flask_output_TM5_MP_'+file[19:])            
            #print('flask_output_ECE_'+file[19:])
            TM5_MP_filename = 'flask_output_TM5_MP_'+file[19:]
            ECE_filename = 'flask_output_ECE_'+file[19:]
            
            flask_in = os.path.join(path,'sample_flasks',file)
            flask_mp = os.path.join(path,'TM5_offline',TM5_MP_filename)
            flask_esm = os.path.join(path,'TM5_ESM',ECE_filename)

            flask_in_fh = nc.Dataset(flask_in,mode='r')
            flask_mp_fh = nc.Dataset(flask_mp,mode='r')
            flask_esm_fh = nc.Dataset(flask_esm,mode='r')
            
            x = 40000000
            fin_obs = np.append(fin_obs,flask_in_fh.variables['observed'][:x])
            fin_date = flask_in_fh.variables['date_components'][:x]
            obs_num = np.append(obs_num,flask_in_fh.variables['obs_num'][:x])
            lat = np.append(lat,flask_in_fh.variables['latitude'][:x])
            lon = np.append(lon,flask_in_fh.variables['longitude'][:x])
            alt = np.append(alt,flask_in_fh.variables['altitude'][:x])
            fmp_obs0 = np.append(fmp_obs0,flask_mp_fh.variables['flask'][:x,0]) #Are 10 members. First check if it works with 1
            fesm_obs = np.append(fesm_obs,flask_esm_fh.variables['flask'][:x,0])
            
            flask_in_fh.close()
            flask_mp_fh.close()
            flask_esm_fh.close()

            #check if the shapes are similar, meaning that they have the same number of points
            #print(np.shape(fin_obs),np.shape(fmp_obs0),np.shape(fesm_obs))
            
            #Code to transform the date components of the input file to a pandas series of timestamps
            N = (len(fin_date))
            base = datetime.datetime(1900, 1, 1)
            time_intermediate = np.array([base + datetime.timedelta(hours=i) for i in range(N)])
            for i in range(N):
                dt = datetime.datetime(fin_date[i,0],fin_date[i,1],fin_date[i,2],fin_date[i,3],fin_date[i,4],fin_date[i,5])
                time_intermediate[i] = pd.Timestamp(dt)
            time = np.append(time,time_intermediate)

df = pd.DataFrame({'A_fin_obs' : fin_obs*1000000,
                   'B_fmp_obs' : fmp_obs0*1000000,
                   'C_fesm_obs': fesm_obs*1000000,
                   'D_alt'     : alt,
                   'E_lat'     : lat,
                   'F_lon'     : lon,
                   'G_date'    : time},index=obs_num)

In [38]:
print(np.shape(obs_num))
df

(25427,)


Unnamed: 0,A_fin_obs,B_fmp_obs,C_fesm_obs,D_alt,E_lat,F_lon,G_date
1208.0,383.934996,389.757333,390.668749,195.0,82.450798,-62.507198,2007-11-07 18:45:00
1209.0,384.004990,389.706780,390.644534,195.0,82.450798,-62.507198,2007-11-07 19:42:00
4859831.0,383.589999,389.756082,390.668778,200.0,82.450798,-62.507198,2007-11-07 18:40:00
2418594.0,382.750004,391.714129,391.738693,200.0,82.450798,-62.507198,2007-11-03 00:00:00
2418595.0,382.450002,391.719048,391.740497,200.0,82.450798,-62.507198,2007-11-03 01:00:00
2418596.0,383.250008,391.526701,390.466972,200.0,82.450798,-62.507198,2007-11-04 09:00:00
2418597.0,383.120001,391.507609,390.480476,200.0,82.450798,-62.507198,2007-11-04 10:00:00
2418598.0,383.139995,391.477777,390.485744,200.0,82.450798,-62.507198,2007-11-04 11:00:00
2418599.0,383.230014,391.436392,390.485977,200.0,82.450798,-62.507198,2007-11-04 12:00:00
2418600.0,383.200007,391.382608,390.482281,200.0,82.450798,-62.507198,2007-11-04 13:00:00


In [40]:
fesm_3hr_obs = []
fin_3hr_obs = []
obs_num_3hr = []
for root, dirs, files in os.walk(os.path.join(path,'TM5_ESM_3hr')):
    for file in sorted(files):
        if file.startswith('sample_coordinates'):
            print(file,file[19:])
            #print(file)
            ECE_filename = 'flask_output_ECE_'+file[19:]
            flask_esm3hr = os.path.join(path,'TM5_ESM_3hr',ECE_filename)
            flask_esm3hr_fh = nc.Dataset(flask_esm3hr,mode='r')
            
            flask_3hr_in = os.path.join(path,'TM5_ESM_3hr',file)
            flask_3hr_in_fh = nc.Dataset(flask_3hr_in,mode='r')
            
            fesm_3hr_obs = np.append(fesm_3hr_obs,flask_esm3hr_fh.variables['flask'][:x,0])
            fin_3hr_obs = np.append(fin_3hr_obs,flask_3hr_in_fh.variables['observed'][:x])
            obs_num_3hr = np.append(obs_num_3hr,flask_3hr_in_fh.variables['obs_num'][:x])

df_3hr = pd.DataFrame({'H_fin_3hr_obs' : fin_3hr_obs*1000000,
                       'I_fesm_3hr_obs': fesm_3hr_obs*1000000},index=obs_num_3hr)
print(np.shape(obs_num_3hr))
print(obs_num_3hr[:100])
df_3hr

('sample_coordinates_2007110300_2007110400.nc', '2007110300_2007110400.nc')
('sample_coordinates_2007110400_2007110500.nc', '2007110400_2007110500.nc')
('sample_coordinates_2007110500_2007110600.nc', '2007110500_2007110600.nc')
('sample_coordinates_2007110600_2007110700.nc', '2007110600_2007110700.nc')
('sample_coordinates_2007110700_2007110800.nc', '2007110700_2007110800.nc')
('sample_coordinates_2007110800_2007110900.nc', '2007110800_2007110900.nc')
('sample_coordinates_2007110900_2007111000.nc', '2007110900_2007111000.nc')
(25632,)
[  2.41859400e+06   2.41859500e+06   1.43969000e+06   1.43969100e+06
   1.43969200e+06   1.43969300e+06   1.43969400e+06   1.43969500e+06
   1.43969600e+06   1.43969700e+06   1.43969800e+06   1.43969900e+06
   1.43970000e+06   1.43970100e+06   1.43970200e+06   1.43970300e+06
   1.43970400e+06   5.37700000e+03   1.50310200e+06   1.50310300e+06
   1.50310400e+06   1.50310500e+06   1.50310600e+06   1.50310700e+06
   1.50310800e+06   1.50310900e+06   1.503110

Unnamed: 0,H_fin_3hr_obs,I_fesm_3hr_obs
2418594.0,382.750004,-1.000000e+40
2418595.0,382.450002,-1.000000e+40
1439690.0,384.329993,-1.000000e+40
1439691.0,384.660001,-1.000000e+40
1439692.0,385.230000,-1.000000e+40
1439693.0,385.499996,-1.000000e+40
1439694.0,386.559987,-1.000000e+40
1439695.0,386.980013,-1.000000e+40
1439696.0,388.310000,-1.000000e+40
1439697.0,388.699991,-1.000000e+40
