In [1]:
import pandas as pd
from pandas_ods_reader import read_ods
import seaborn as sn
import matplotlib.pyplot as plt
import os

In [2]:
path = 'datasets/cool_temp_maize_soybean/'
file_names = ['flux_soc_out', 'atmosphere_out', 'n_out', 'n2o_nh3_out', 'p_out', 'soil_c_out', 'soil_water_out', 'surf_water_out', 'temp_out']
start_year = 1998
growth_period = 6

In [3]:
def load_files(start_year: int, period: int, file_name : str, path: str, external_save: bool) -> pd.DataFrame:
    files = []
    for i in range(period + 1):
        
        if (start_year == 2001 and file_name == 'atmosphere_out'):
            start_year += 1
            continue
            
        df = read_ods(path + str(start_year) + file_name + '.ods')
        #some files have hourly data, so turn them into daily data
        if ('HOUR' in df.columns):
            df = df.drop(['DATE' , 'HOUR'], axis=1)
            df['DATE'] = pd.date_range(start='1/1/' + str(start_year), periods=len(df), freq='H')
            df.set_index('DATE', inplace=True)
            df = df.resample('D').mean().reset_index()
            
        #index used to make join faster 
        df.set_index('DATE')
        df = df.drop('DOY', axis=1)
        files.append(df)
        start_year += 1
    
    merged_data = pd.concat(files).reset_index().drop('index', axis=1)    
    if (external_save):
        abs_path = os.path.join(path, 'daily_data_merged/')
        if not os.path.exists(abs_path):
            os.makedirs(abs_path)
        merged_data.to_csv(os.path.join(abs_path, file_name + '.csv'))
    return merged_data

In [5]:
df = load_files(start_year, growth_period, file_names[8], path, True)

In [5]:
df

Unnamed: 0,DATE,RESIDUE_C,HUMUS_C,AMENDED_C,LITTER_C,CO2_FLUX,O2_FLUX,AUTO_RESP,MICRO_C,SURF_RES,...,SOC_1,SOC_2,SOC_3,SOC_4,SOC_5,SOC_6,SOC_7,SOC_8,SOC_9,SOC_10
0,1998-01-01 00:00:00,520.6404,5829.505,0.0,0.0,-0.367903,7.701474,0.0,173.56390,299.262200,...,208.3618,321.3799,978.3816,978.8501,959.8790,960.0927,347.6805,464.7213,394.9324,329.9658
1,1998-01-01 01:00:00,520.2319,5827.954,0.0,0.0,-1.328162,17.841200,0.0,174.64740,298.954500,...,208.3715,321.2600,977.9357,978.4709,959.5856,959.8572,347.6099,464.6540,394.8999,329.9523
2,1998-01-01 02:00:00,519.4646,5826.513,0.0,0.0,-3.758204,29.284860,0.0,175.87530,298.293300,...,208.4164,321.0845,977.5222,978.1268,959.3218,959.6431,347.5475,464.5858,394.8688,329.9361
3,1998-01-01 03:00:00,518.7480,5825.145,0.0,0.0,-6.237416,40.257220,0.0,176.96220,297.681700,...,208.4474,320.9328,977.1290,977.7904,959.0721,959.4471,347.4888,464.5228,394.8343,329.9193
4,1998-01-01 04:00:00,518.1078,5823.846,0.0,0.0,-8.273404,48.820270,0.0,177.89820,297.145300,...,208.4754,320.7908,976.7527,977.4705,958.8353,959.2623,347.4342,464.4627,394.7981,329.9024
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2552,2004-01-16 01:00:00,102.9864,5383.575,0.0,0.0,-349.421500,1047.704000,0.0,51.80003,6.953911,...,224.7586,277.8950,904.1473,842.8201,817.0486,837.3776,319.5120,436.7137,382.7864,330.5983
2553,2004-01-16 02:00:00,102.9408,5383.554,0.0,0.0,-349.432600,1048.570000,0.0,51.74522,6.946403,...,224.7601,277.8897,904.1288,842.8070,817.0433,837.3719,319.5095,436.7107,382.7841,330.5938
2554,2004-01-16 03:00:00,102.8962,5383.534,0.0,0.0,-349.443500,1049.527000,0.0,51.69143,6.939627,...,224.7617,277.8842,904.1104,842.7940,817.0381,837.3661,319.5070,436.7078,382.7820,330.5895
2555,2004-01-16 04:00:00,102.8534,5383.513,0.0,0.0,-349.454400,1050.430000,0.0,51.63892,6.933514,...,224.7643,277.8791,904.0920,842.7811,817.0328,837.3604,319.5045,436.7048,382.7798,330.5852
