In [7]:
import pandas as pd
from pandas_ods_reader import read_ods
import seaborn as sn
import matplotlib.pyplot as plt
import os
import tqdm

In [8]:
path = 'datasets/warm_temp_maize_soybean_irrigated/'
file_names = ['flux_soc_out', 'atmosphere_out', 'n_flux_out', 'n2o_nh3_out', 'p_flux_out', 'soil_c_out', 'soil_water_out', 'surf_water_out', 'temp_out']
start_year = 2001
growth_period = 6

In [9]:
def load_files(start_year: int, period: int, file_name : str, path: str, external_save: bool) -> pd.DataFrame:
    files = []
    for i in tqdm.tqdm(range(period + 1)):
        df = read_ods(path + str(start_year) + file_name + '.ods')
        #some files have hourly data, so turn them into daily data
        #do yourself a favor and optimize this monstrository at some point pls
        if ('HOUR' in df.columns):
            df = df.drop(['DATE' , 'HOUR'], axis=1)
            df['DATE'] = pd.date_range(start='1/1/' + str(start_year), periods=len(df), freq='H')
            df.set_index('DATE', inplace=True)
            df = df.resample('D').mean().reset_index()
            
        #index used to make join faster 
        df.set_index('DATE')
        df = df.drop('DOY', axis=1)
        files.append(df)
        start_year += 1
    
    merged_data = pd.concat(files).reset_index().drop('index', axis=1)    
    if (external_save):
        abs_path = os.path.join(path, 'daily_data_merged/')
        if not os.path.exists(abs_path):
            os.makedirs(abs_path)
        merged_data.to_csv(os.path.join(abs_path, file_name + '.csv'))
    return merged_data

In [10]:
df = load_files(start_year, growth_period, file_names[8], path, True)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:01<00:00,  5.90it/s]


In [11]:
df

Unnamed: 0,DATE,RADN,TMAX_AIR,TMIN_AIR,HMAX_AIR,HMIN_AIR,WIND,PRECN,TMAX_SOIL_1,TMIN_SOIL_1,...,ECND_5,ECND_6,ECND_7,ECND_8,ECND_9,ECND_10,ECND_11,ECND_12,TTL_SALT_DISCHG,unnamed.1
0,1012001.0,7.099775,-10.349340,-19.563370,0.248914,0.118633,304.5902,0.0,12.280550,-6.317841,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
1,2012001.0,8.122130,-4.238232,-19.545430,0.395680,0.118810,304.5902,0.0,4.048279,-7.002594,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
2,3012001.0,8.737134,2.983990,-16.492550,0.666424,0.152769,304.5902,0.0,6.269165,-6.062439,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
3,4012001.0,8.232980,8.539546,-7.811173,0.837969,0.302538,304.5902,0.0,9.558594,-3.242218,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
4,5012001.0,8.136024,8.539546,-4.524794,0.842864,0.387355,304.5902,0.0,12.270170,-0.841858,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2551,27122007.0,2.951662,0.750000,-9.540000,0.568845,0.264999,296.3880,0.0,0.039520,-3.444183,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,010102007temp_ou
2552,28122007.0,3.439205,-3.210000,-11.750000,0.426891,0.223144,267.0480,0.0,-0.688629,-5.245087,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,010102007temp_ou
2553,29122007.0,8.381751,-7.350000,-13.260000,0.313329,0.198082,307.1160,0.0,0.227539,-5.977509,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,010102007temp_ou
2554,30122007.0,4.583860,-7.050000,-13.180000,0.320535,0.199344,273.7440,0.0,-1.562378,-5.902954,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,010102007temp_ou
