In [None]:
import pandas as pd
import os
from glob import glob
from statistics import mean

# Import libraries
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

In [None]:
def mk_dir_fn(path_):
    
    if not os.path.isdir(path_):
        os.mkdir(path_)

In [None]:
# type of data
d_type = "et_morton_actual"


if d_type == "daily_rain":
    str_type = "precipitation"
    
elif d_type == "et_morton_actual":
    str_type = "Morton's evapotranspiration"

elif d_type == "max_temp":
    str_type = "maximum temperature"

elif d_type == "min_temp":
    str_type = "minimum temperature"

elif d_type == "rh_tmax":
    str_type = "maximum relative humidity"
    
elif d_type == "rh_tmin":
    str_type = "minimum relative humidity"

elif d_type == "vp":
    str_type = "vapour pressure"
else:
    pass

# met data
dir_ = r"X:\PGB\RSU\biomass\raw_zonal_stats\met\collation\{0}".format(d_type)

# plot directories
export_dir = r"X:\PGB\RSU\biomass\plots\{0}".format(d_type)
mk_dir_fn(export_dir)

export_box = os.path.join(export_dir, "box_plots")
mk_dir_fn(export_box)
export_line = os.path.join(export_dir, "line_plots")
mk_dir_fn(export_line)


csv_export = r"X:\PGB\RSU\biomass\met_zonal_stats_per_site\{0}".format(d_type)
mk_dir_fn(csv_export)

monthly_dir = os.path.join(csv_export, "mean_monthly_{0}".format(d_type))
mk_dir_fn(monthly_dir)
mean_monthly_dir = os.path.join(csv_export, "monthly_{0}".format(d_type))
mk_dir_fn(mean_monthly_dir)

In [None]:
for f in glob(os.path.join(dir_, "*.csv")):
    csv_file = f
    print("-"*100)
    print("working for: ", f)
    
    df = pd.read_csv(csv_file)
    df.dropna(inplace=True)
    df1 = pd.read_csv(csv_file)
    df1.dropna(inplace=True)
    
    # extract name and site year
    _, f = os.path.split(csv_file)
    flist = f.split("_")
    site_name, field_year = flist[0].split(".")
    print(site_name, field_year)
    
    # splitting date time to year month and day
    year = []
    month = []
    day = []

    for index, row in df.iterrows():
        #print(row["im_date"])
        date = str(row["im_date"])
        year.append(date[:4])
        month.append(date[4:6])
        day.append(date[6:])

    df["year"] = year
    df["month"] = month
    df["day"] = day
    
    # collate df
    year_list = []
    month_list = []

    mean_list = []
    
    print(list(df.columns))

#     # ---------------- calulate montly mean met value across all data ---------------------------
    
    for y in df["year"].unique():
        #year_list.append(y)
        y_df = df[df["year"]== y]

        for m in y_df["month"].unique():
            my_df = y_df[y_df["month"]== m]
            
            col_list = my_df.columns.tolist()
            #print(col_list)
            matches = [match for match in col_list if "mean" in match]
            #print(matches[0])
            ym = my_df[matches[0]].tolist()
            year_list.append(y)
            month_list.append(m)
            mean_list.append(mean(ym))
            
            
    monthly_df = pd.DataFrame({"year": year_list,
                     "month": month_list,
                    "mean": mean_list})
    
    monthly_df.insert(0, "site", site_name)
    monthly_df.insert(1, "site_year", field_year)
    monthly_df.insert(2, "met", d_type)
    
    monthly_out = os.path.join(monthly_dir, f"{site_name}_monthly_{d_type}.csv")
    monthly_df.to_csv(monthly_out, index = False)
    
#     # ------------------------------------------------------------------------------------------
    
    
    month_list2 = []
    mean_month_list = []
    month_list3 = []

    for m in monthly_df["month"].unique():
        #print("month: ", m)
        #month_list3.append(m)

        my_df = monthly_df[monthly_df["month"]== m]
        month_var = my_df["mean"].tolist()
        
        mean_month_list.append(mean(month_var))
        month_list3.append(month_var)
        month_list2.append(m)
    
    print("month_list2: ", month_list2)
    print("mean_month_list: ", mean_month_list)
    print("="*50)
    mean_monthly_df = pd.DataFrame({
                             "month": month_list2,
                            "var": mean_month_list})
    
    mean_monthly_df.insert(0, "site", site_name)
    mean_monthly_df.insert(1, "site_year", field_year)
    mean_monthly_df.insert(2, "met", d_type)
    
    mean_monthly_out = os.path.join(mean_monthly_dir, f"{site_name}_mean_monthly_rain.csv")
    mean_monthly_df.to_csv(mean_monthly_out, index = False)
    
    print("export: ", mean_monthly_out)

    
    # ------------------------------------------ Box Plot -----------------------------------------------
        
    data = month_list3
    #print(data)

    fig = plt.figure(figsize =(10, 7))
    #plt.xlim(-10, 10)
    plt.ylim(-5,25)

    plt.xticks([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 
               ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'June', 'July', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'], 
               #rotation=90,  fontsize='12',
              )
    plt.title(f'Mean monthly {str_type} variation between 1988 and 2023 at site: {site_name}.',  fontsize='12', horizontalalignment='center')
    # Custom Axis title
    plt.xlabel('Months',  fontsize='12', horizontalalignment='center')
    # Custom Axis title
    plt.ylabel('Monthly mean {0} (mm)'.format(str_type),  fontsize='12', horizontalalignment='center')
    # Creating plot
    plt.boxplot(data, showfliers = False)

    #show plot
    plt.show()

    box_out = os.path.join(export_box, f"{site_name}_{d_type}_box.jpg")
    plt.savefig(box_out)

    # Time trace 
    y_list = monthly_df["year"].tolist()
    m_list = monthly_df["month"].tolist()
    val_list = monthly_df["mean"].tolist()
    dt_list2 = []
    year__ = monthly_df["year"].unique().tolist()
    for y, m, v  in zip(y_list, m_list, val_list):
        i = f"{str(y)}{str(m)}15"

        dt_list2.append(pd.to_datetime(i, format='%Y%m%d'))


    # ---------------------- Time Trace --------------------------

    df3 = pd.DataFrame(val_list, index=dt_list2, columns=["met"])


    fig, axs = plt.subplots(1,1, figsize=(18,5))
    x = df3.index
    y = df3.met
    axs.plot(x, y)
    fig.show() 
    
    plt.title('Time trace mean {0} between 1988 and 2023, site: {1} .'.format(str_type, site_name),  fontsize='12', horizontalalignment='center')
    # Custom Axis title
    plt.xlabel('Years',  fontsize='12', horizontalalignment='center')
    # Custom Axis title
    plt.ylabel('Mean monthly {0} (mm)'.format(str_type),  fontsize='12', horizontalalignment='center')

    axs.set_xticklabels(year__)
    axs.xaxis.set_ticks(year__)
    plt.xticks(rotation=90, ha='right')
    # limit data time
    plt.xlim(['1993', '2023'])

    line_out = os.path.join(export_line, f"{site_name}_{d_type}_line.jpg")
    plt.savefig(line_out)
    fig.show()            

In [None]:
csv_file = r"X:\PGB\RSU\biomass\raw_zonal_stats\met\collation\daily_rain\adelaideriver.2008_1ha_daily_rain_zonal_stats.csv"

In [None]:
df = pd.read_csv(csv_file)
df1 = pd.read_csv(csv_file)

In [None]:
_, f = os.path.split(csv_file)
flist = f.split("_")
site_name, field_year = flist[0].split(".")
print(site_name, field_year)

In [None]:
df

In [None]:
year = []
month = []
day = []

for index, row in df.iterrows():
    #print(row["im_date"])
    date = str(row["im_date"])
    year.append(date[:4])
    month.append(date[4:6])
    day.append(date[6:])
    

df["year"] = year
df["month"] = month
df["day"] = day

In [None]:
print(list(df))

In [None]:


year_list = []
month_list = []

mean_list = []

for y in df["year"].unique():
    #year_list.append(y)
    y_df = df[df["year"]== y]

    for m in y_df["month"].unique():
        my_df = y_df[y_df["month"]== m]
        ym_rain = my_df.rain_d_mean.tolist()
        year_list.append(y)
        month_list.append(m)
        mean_list.append(mean(ym_rain))


    

In [None]:
mean_list

In [None]:
len(year_list)

In [None]:
len(month_list)

In [None]:
len(mean_list)

In [None]:
len(month_list)

In [None]:
monthly_rain = pd.DataFrame({"year": year_list,
                             "month": month_list,
                            "mean": mean_list})



In [None]:
monthly_rain

In [None]:
# m_min_list = []
# m_max_list = []
mean_list = []
month_list = []

month_list = []
mean_month_rain_list = []
    
for m in monthly_rain["month"].unique():
    print("month: ", m)
    month_list.append(m)
    my_df = monthly_rain[monthly_rain["month"]== m]
    month_rain = my_df["mean"].tolist()
    print(mean(month_rain))
    mean_month_rain_list.append(mean(month_rain))


In [None]:
len(month_list)

In [None]:
mean_monthly_rain = pd.DataFrame({
                             "month": month_list,
                            "rain": mean_month_rain_list})

In [None]:
mean_monthly_rain.insert(0, site_name, "site")
mean_monthly_rain.insert(1, field_year, "site_year")

In [None]:
mean_monthly_rain

In [None]:

# Import libraries
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
 
 
# Creating dataset
#np.random.seed(10)
data = month_list
 
fig = plt.figure(figsize =(10, 7))
#plt.xlim(-10, 10)
plt.ylim(-5,25)

plt.xticks([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 
           ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'June', 'July', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'], 
           #rotation=90,  fontsize='12',
          )


plt.title(f'Mean Minimum Daily Rainfall at {site_name} from 1988 until 2023.',  fontsize='12', horizontalalignment='center')
# Custom Axis title
plt.xlabel('Months',  fontsize='12', horizontalalignment='center')
# Custom Axis title
plt.ylabel('Mean Minimum Rainfall (mm)',  fontsize='12', horizontalalignment='center')
# Creating plot
plt.boxplot(data, showfliers = False)
 
# show plot
plt.show()

box_out = os.path.join(export_box, f"{site_name}_rainfall_box.jpg")
plt.savefig(box_out)

In [None]:
monthly_rain

y_list = monthly_rain["year"].tolist()
m_list = monthly_rain["month"].tolist()
val_list = monthly_rain["mean"].tolist()
dt_list2 = []
year__ = monthly_rain["year"].unique().tolist()
for y, m, v  in zip(y_list, m_list, val_list):
    i = f"{str(y)}{str(m)}15"
    
    dt_list2.append(pd.to_datetime(i, format='%Y%m%d'))
    #print(pd.to_datetime(i, format='%Y%m%d')
    #dt = pd.to_datetime(i)
    #print(dt)
    
    

In [None]:
year__ 

In [None]:
dt_list = df1.im_date.tolist()
val_list = df1.rain_d_mean.tolist()
dt_list2 = []
for i, v  in zip(dt_list, val_list):
    print(i)
    
    dt_list2.append(pd.to_datetime(i, format='%Y%m%d'))
    #print(pd.to_datetime(i, format='%Y%m%d')
    #dt = pd.to_datetime(i)
    #print(dt)
    
    


In [None]:
df3 = pd.DataFrame(val_list, index=dt_list2, columns=["rainfall"])

In [None]:
df3

In [None]:
year__

In [None]:
fig, axs = plt.subplots(1,1, figsize=(18,5))
x = df3.index
y = df3.rainfall
axs.plot(x, y)

# # xticks and labels: select only the first
# # unique year
# xticks, xlabels = [], []
# for t, d in zip(x, dates):
#     print("d: ", d)
#     if (not xlabels) or (d[:4] != xlabels[-1]):
#         xticks.append(t)
#         # keep only the year
#         print(d[:4])
#         xlabels.append(d[:4])
        
#axs.set_xticks(xticks)
axs.set_xticklabels(year__)
# f.autofmt_xdate(rotation=60, ha="right")


axs.xaxis.set_ticks(year__)
plt.xticks(rotation=90, ha='right')
#axs.xaxis.set_major_formatter(ticker.FormatStrFormatter('%3f'))
plt.xlim(['1993', '2023'])
fig.show()

In [None]:
df1["t_stamp"] =dt_list2

In [None]:
df1

In [None]:
df1.set_index(dt_list2, inplace=True)

In [None]:
df1

In [None]:
df