# CZOData Example 1: Read and plot CZO_DisplayFile_v1 with Pandas.
Writen by Anthony Aufdenkampe, Friday Dec. 13, 2013. Modified in Nov. 2017.
This example demonstrates the following:
* Read from a "CZO Display File v1", convert to Pandas DataFrame and plot time series.
* Export DataFrame to csv file in a format that can be uploaded to a HydroShare Time Series Resource, which auto-converts the data into an ODM2 database in SQLite format. 


In [1]:
# Import all required Python libraries and modules
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
# Create a list of file paths for the CZO Display Files (v1) to read
#   Examples here available at http://czo.stroudcenter.org/data/ or http://criticalzone.org/christina/data/

file_paths = ['/Users/aaufdenkampe/Documents/Python/EnviroDataScripts/CZODisplayParsePlot/ExampleData/CRB_WCC_STAGEFLOW_2011.csv', 
              '/Users/aaufdenkampe/Documents/Python/EnviroDataScripts/CZODisplayParsePlot/ExampleData/CRB_WCC_STAGEFLOW_2012.csv'
              ]

In [3]:
# A For loop that reads each file using the Pandas "read_csv" function, 
#   then appends the resulting DataFrame object to a list called "data_frames".

data_frames = []
for file_path in file_paths:
    df = pd.read_csv(file_path, header=0, skipinitialspace=True, skiprows=[1], index_col=0, na_values=[-9999], parse_dates=True)
    data_frames.append(df)

In [4]:
# Concatenate all the DataFrames in the "data_frames" list into a single DataFrame
df = pd.concat(data_frames)

In [5]:
df

Unnamed: 0_level_0,Gage Height (ft) from Continuous record,Gage Height (ft),Discharge (cfs) from Continuous record,Discharge (cfs)
Date Time (EST),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2011-01-01 00:00:00,3.056,,2.9,
2011-01-01 00:15:00,3.056,,2.9,
2011-01-01 00:30:00,3.055,,2.89,
2011-01-01 00:45:00,3.055,,2.88,
2011-01-01 01:00:00,3.057,,2.91,
2011-01-01 01:15:00,3.057,,2.91,
2011-01-01 01:30:00,3.056,,2.91,
2011-01-01 01:45:00,3.055,,2.89,
2011-01-01 02:00:00,3.054,,2.88,
2011-01-01 02:15:00,3.054,,2.87,


In [None]:
df.index

In [None]:
df.index = df.index.tz_localize('EST')

In [None]:
df.index

In [None]:
df.dtypes

In [None]:
df['Gage Height (ft) from Continuous record'] = pd.to_numeric(df['Gage Height (ft) from Continuous record'], errors='coerce')
df['Discharge (cfs) from Continuous record'] = pd.to_numeric(df['Discharge (cfs) from Continuous record'], errors='coerce')

In [None]:
df.dtypes

In [None]:
df.head(n=5)

In [None]:
df.columns

In [None]:
%matplotlib inline

In [None]:
df.plot()

In [None]:
ax = df['Discharge (cfs) from Continuous record'].plot(title=file_path, style='b', logy=True, ylim=(1,1000), legend=True)
ax.set_ylabel(u'Discharge (cfs) from Continuous record', color='b')

ax2 = df['Gage Height (ft) from Continuous record'].plot(secondary_y=True, style='g', legend=True)
ax2.set_ylabel(u'Gage Height (ft) from Continuous record', color='g')

### Export to HydroShare Time Series compatible CSV
The data frame needs to be conversted to a csv format that can be read into HydyroShare. 
See https://help.hydroshare.org/hydroshare-resource-types/time-series/understanding-what-file-types-can-be-uploaded-into-a-time-series-resource/
* CSV files uploaded to HydroShare must have a single date/time column called “ValueDateTime” and in the format “yyyy-mm-dd hh:mm:ss”

In [None]:
# Create new data frame for export, with only the continuous data from the sensors
df_export = df[['Gage Height (ft) from Continuous record', 'Discharge (cfs) from Continuous record']]

In [None]:
# Rename the date/time index
df_export.index.names = ['ValueDateTime']

# Rename the column headings
df_export = df_export.rename(index=str, columns={"Discharge (cfs) from Continuous record": "discharge_cfs", "Gage Height (ft) from Continuous record": "stage_ft"})

# View result
df_export.head(n=5)

In [None]:
# Remove any emptry strings and/or null values
df_export.replace('', np.nan, inplace=True)    # convert emptry strings to null values (NaN)
df_export.dropna(inplace=True)

In [None]:
# Write dataframe to new csv file for import into HydroShare Time Series resource
df_export.to_csv('/Users/aaufdenkampe/Documents/Python/EnviroDataScripts/CZODisplayParsePlot/ExampleData/CRB_WCC_STAGEFLOW_from_df.csv')