# Analyze all the observed data for B2 site
 ---
**Description:** this script will analyze all the observed data (meteo+other parameters) for the meteorological station B2 located in Val Mazia (Italy)

**Author:** Elisa Bortoli (elisa.bortoli@eurac.edu)

**Credits:** Giacomo Bertoldi (giacomo.bertoldi@eurac.edu)

**Date:** 2019-07-25

**Version:** 1.0

**Usage:** simple run

**Python version:** 3.6.8

**Requirements:** Python modules: os, glob, sys, pandas, numpy, matplotlib

 **Notes:** 
- The test setup can be found at:
https://github.com/geotopmodel/geotop/tree/v3.0/tests/1D/Matsch_B2_Ref_007


- The given input meteo can be found at: 
https://raw.githubusercontent.com/geotopmodel/geotop/master/tests/1D/Matsch_B2_Ref_007/meteo0001.txt


- The other measured parameters can be found at:
https://github.com/EURAC-Ecohydro/MonaLisa/tree/master/geotop/1D/Matsch_B2_Optim_001/obs


## Import the necessary modules

In [1]:
import os 
import glob 
import sys
import pandas as pd
import numpy as np
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
import matplotlib.pyplot as plt
import datetime as datetime

In [2]:
%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999;
// avoid output into scrollable frames

<IPython.core.display.Javascript object>

## Set working paths of the simulation

In [3]:
B2_path = r"/home/elisa/paper_GEOtop/tests/Matsch_B2_Ref_007"

# Simulations
sim_data = B2_path + r"/data_simulated"
sim_path = sim_data + r"/sim_2"

# Observations
obs_data = B2_path + r"/data_observed"
obs_path = obs_data + r"/obs-bottazzi"

## Meteo data


### Given input (the same used by Bottazzi)

In [4]:
# Read in list of files
meteo_path = sim_path + r"/meteo/"

# Select only point0*.txt files
os.chdir(meteo_path)
unsorted_meteo_files = glob.glob("meteo*.txt")

# Sort in alphabetical order files
meteo_files = sorted(unsorted_meteo_files, key=str.lower)

# Imports files into pandas dataframe
meteo_data = []
for j,trace in enumerate(meteo_files):
    filepath = os.path.join(meteo_path, trace)
    traces_series = pd.read_csv(filepath,
                                parse_dates = ['Date'], # Date format is correct
                                index_col = ['Date'], # No more element indexes
                                dayfirst = True, # Coherent date format
                                na_values=['-9999']) # NaN 
    meteo_data.append(traces_series)
list(traces_series)

['JDfrom0',
 'Iprec',
 'WindSp',
 'WindDir',
 'RelHum',
 'AirT',
 'SWglobal',
 'CloudTrans']

In [5]:
# Effective start and end
print("Practice")
print("start = ", traces_series.index.min())
print("end   = ", traces_series.index.max())
print("len(period_data) = ", len(traces_series))
print(" ------------------------------------------- ")
print("Theory")
start = datetime.datetime(2009,10,2,0,0)
end  = datetime.datetime(2015,12,31,23,0)
period_theory = pd.date_range(start, end, freq='H')
print("start = ", start)
print("end   = ", end)
print("len(period_data) = ", len(period_theory))
print(" ------------------------------------------- ")
print("Missing data = ", len(period_theory)-len(traces_series))

Practice
start =  2009-10-02 00:00:00
end   =  2015-12-31 23:00:00
len(period_data) =  54768
 ------------------------------------------- 
Theory
start =  2009-10-02 00:00:00
end   =  2015-12-31 23:00:00
len(period_data) =  54768
 ------------------------------------------- 
Missing data =  0


## Output data
### Obs


In [6]:
# Read in list of files
obs_path_B = obs_data + r"/obs-bottazzi-tagliato/"

# Select only obs*.txt files
os.chdir(obs_path_B)
unsorted_obs_files = glob.glob("obs*.txt")

# Sort in alphabetical order files
obs_files = sorted(unsorted_obs_files, key=str.lower)

# Imports files into pandas dataframe
observed_data = []
for j,trace in enumerate(obs_files):
    filepath = os.path.join(obs_path_B, trace)
    traces_series = pd.read_csv(filepath,
                                parse_dates = ['Date12.DDMMYYYYhhmm.'], # Date format is correct
                                index_col = ['Date12.DDMMYYYYhhmm.'], # No more element indexes
                                dayfirst = True, # Coherent date format
                                na_values=['-9999']) # NaN 
    observed_data.append(traces_series)

In [7]:
# View all the columns headers
list(traces_series)

['rainfall_amount',
 'wind_speed',
 'wind_from_direction',
 'relative_humidity',
 'air_temperature',
 'surface_downwelling_shortwave_flux',
 'soil_moisture_content_50',
 'soil_moisture_content_200',
 'latent_heat_flux_in_air',
 'sensible_heat_flux_in_air']

In [8]:
# Effective start and end
print("Practice")
print("start = ", traces_series.index.min())
print("end   = ", traces_series.index.max())
print("len(period_data) = ", len(traces_series))
print(" ------------------------------------------- ")
print("Theory")
start = datetime.datetime(2009,10,2,1,0)
end  = datetime.datetime(2015,12,31,23,0)
period_theory = pd.date_range(start, end, freq='H')
print("start = ", start)
print("end   = ", end)
print("len(period_data) = ", len(period_theory))
print(" ------------------------------------------- ")
print("Missing data = ", len(period_theory)-len(traces_series))
gaps = period_theory[~period_theory.isin(traces_series.index)]
print(gaps)

Practice
start =  2009-10-02 01:00:00
end   =  2015-12-31 23:00:00
len(period_data) =  54761
 ------------------------------------------- 
Theory
start =  2009-10-02 01:00:00
end   =  2015-12-31 23:00:00
len(period_data) =  54767
 ------------------------------------------- 
Missing data =  6
DatetimeIndex(['2010-03-28 02:00:00', '2011-03-27 02:00:00',
               '2012-03-25 02:00:00', '2013-03-31 02:00:00',
               '2014-03-30 02:00:00', '2015-03-29 02:00:00'],
              dtype='datetime64[ns]', freq=None)
