In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import glob
import fastparquet

In [2]:
def get_location(data, sort, create=False):
    unique_station = data.STATION.unique()
    unique_lat = np.zeros(len(unique_station))
    unique_lon = np.zeros(len(unique_station))
    
    for index, station in enumerate(unique_station):
        unique_lat[index] = data[data.STATION == station].LATITUDE[0]
        unique_lon[index] = data[data.STATION == station].LONGITUDE[0]
        
    
    locations = pd.DataFrame([unique_station, unique_lat, unique_lon]).T

    locations = locations.rename({0:'station ID', 1:'lat', 2:'lon'}, axis='columns')
    
    if create == True:
        locations.to_csv(f'{data_folder}\\locations_{sort}.csv')
        return locations
    return locations

In [3]:
def split_df_station(data, locations, sort, create=False):
    locations = np.array(locations)
    #unique_station = data.STATION.unique()
    #create a data frame dictionary to store your data frames
    DataFrameDict = {x : pd.DataFrame() for x in locations}

    for key in DataFrameDict.keys():
        DataFrameDict[key] = data[:][data.STATION == key]
        if create == True:
            DataFrameDict[key].to_csv(f'{data_folder}\\data per station\\data_{key}_{sort}.csv')
    return DataFrameDict
    

In [4]:
def list_dataframe(dict_data, sort):
    df_list = []
    for name in dict_data.keys():
        DF = pd.read_csv(f'{data_folder}\\data per station\\data_{name}_{sort}.csv', index_col='DATE', parse_dates=True)
        df_list.append(DF)
    return df_list

### Data

All data is downloaded from:  https://www.ncei.noaa.gov/cdo-web/ and is prepared in this notebook

In [16]:
path = os.getcwd()
home_path = os.path.dirname(path)
data_folder = f'{home_path}\\Data'

In [17]:
files = glob.glob(f"{data_folder}\\*.csv")
files

['C:\\Users\\anne-\\OneDrive - Delft University of Technology\\Documenten\\Environmental Engineering MSc\\ENVM1502-Catchment-model\\Data\\3285895.csv',
 'C:\\Users\\anne-\\OneDrive - Delft University of Technology\\Documenten\\Environmental Engineering MSc\\ENVM1502-Catchment-model\\Data\\3286005.csv',
 'C:\\Users\\anne-\\OneDrive - Delft University of Technology\\Documenten\\Environmental Engineering MSc\\ENVM1502-Catchment-model\\Data\\precipitation_troylockdam.csv',
 'C:\\Users\\anne-\\OneDrive - Delft University of Technology\\Documenten\\Environmental Engineering MSc\\ENVM1502-Catchment-model\\Data\\Q_ids_with_begin-end-date.csv']

In [18]:
data_P = pd.read_csv(files[1], index_col='DATE', usecols=['DATE', 'PRCP', 'STATION', 'LATITUDE', 'LONGITUDE'],  delimiter=',', parse_dates=True)


In [8]:
files = glob.glob(f"{data_folder}\\P\\*.csv")
files

['C:\\Users\\anne-\\OneDrive - Delft University of Technology\\Documenten\\Environmental Engineering MSc\\ENVM1502-Catchment-model\\Data\\P\\average_P.csv',
 'C:\\Users\\anne-\\OneDrive - Delft University of Technology\\Documenten\\Environmental Engineering MSc\\ENVM1502-Catchment-model\\Data\\P\\locations_evap.csv',
 'C:\\Users\\anne-\\OneDrive - Delft University of Technology\\Documenten\\Environmental Engineering MSc\\ENVM1502-Catchment-model\\Data\\P\\locations_P.csv',
 'C:\\Users\\anne-\\OneDrive - Delft University of Technology\\Documenten\\Environmental Engineering MSc\\ENVM1502-Catchment-model\\Data\\P\\locations_prcp.csv',
 'C:\\Users\\anne-\\OneDrive - Delft University of Technology\\Documenten\\Environmental Engineering MSc\\ENVM1502-Catchment-model\\Data\\P\\locations_P_in_basin.csv',
 'C:\\Users\\anne-\\OneDrive - Delft University of Technology\\Documenten\\Environmental Engineering MSc\\ENVM1502-Catchment-model\\Data\\P\\location_northern_basin.csv']

In [None]:
locations_basins = pd.read_csv(files[-1], usecols=['station ID']).values.tolist()
locations_basins = np.stack( locations_basins, axis=1 )[0]

dict_stations_P = split_df_station(data_P, locations_basins, 'prcp') #, create=True)

In [None]:
df_list_prcp = list_dataframe(dict_stations_P, 'prcp')

In [None]:
def average_data(df_lst, column):
    
    df_lst_data = []
    
    for i in range(len(df_lst)):
        df_data = df_lst[i][column]
        df_data.to_frame()
        
        df_lst_data.append(df_data)
    
    df = pd.concat(df_lst_data, axis=1)
    df_avg = df.mean(axis=1).to_frame()
    return df_avg


    
    

In [None]:
average_prcp = average_data(df_list_prcp, 'PRCP')
average_prcp.columns = ['P']
average_prcp.columns

In [None]:
average_prcp.to_parquet(f"{data_folder}\\P\\average_P.parquet")


In [None]:
fig, ax = plt.subplots(figsize=(15,6))
average_prcp.loc['1900'::].plot(ax=ax)
ax.set_ylabel('Precipitation [mm]')
ax.set_title('Average Precipitation Stations in Basins');

In [None]:
a = [1,2,3,4] 
b = [2,3,4,5]
c=[]
c+= a
c+=b

In [None]:
sum(c)

In [None]:
for i in range(1, 10):
    print(i)

In [5]:
path = os.getcwd()
home_path = os.path.dirname(path)
data_folder = f'{home_path}\\Data\\T'
files = glob.glob(f"{data_folder}\\*.csv")
files

['C:\\Users\\anne-\\OneDrive - Delft University of Technology\\Documenten\\Environmental Engineering MSc\\ENVM1502-Catchment-model\\Data\\T\\3292614.csv',
 'C:\\Users\\anne-\\OneDrive - Delft University of Technology\\Documenten\\Environmental Engineering MSc\\ENVM1502-Catchment-model\\Data\\T\\locations_temp.csv',
 'C:\\Users\\anne-\\OneDrive - Delft University of Technology\\Documenten\\Environmental Engineering MSc\\ENVM1502-Catchment-model\\Data\\T\\locations_temperature_in_nothern_basin.csv']

In [23]:
data_T = pd.read_csv(files[0],  delimiter=',', index_col=['DATE'], parse_dates=True)


In [11]:
locations = data_T.STATION.unique()
dict_stations_T = split_df_station(data_T, locations, 'temp')#, create=True)


In [14]:
df_list_temp = list_dataframe(dict_stations_T, 'temp')

In [16]:
for i in range(len(df_list_temp)):
    df_list_temp[i].to_parquet(f'{data_folder}\\data\\data_{i}.parquet')

In [20]:
location_temp = get_location(data_T, 'temp')#, create=True)

In [10]:
locations_temp_north = pd.read_csv(files[2], usecols=['station ID']).values.tolist()
locations_temp_north = np.stack(locations_temp_north, axis=1 )[0]

90

In [15]:
for index, name in enumerate(locations_temp_north):
    file = glob.glob(f"{data_folder}\\data per station\\*{name}*.csv")
    df = pd.read_csv(file[0])
    df.to_parquet(f'{data_folder}\\data parquet basin\\temp_{name}.parquet')

['C:\\Users\\anne-\\OneDrive - Delft University of Technology\\Documenten\\Environmental Engineering MSc\\ENVM1502-Catchment-model\\Data\\T\\data per station\\data_USC00305769_temp.csv']