# Seasonal meterological analysis

In [None]:
import pandas as pd
import os
from glob import glob
from statistics import mean

# Import libraries
import matplotlib.pyplot as plt
import numpy as np

import matplotlib as mpl
mpl.rcParams['axes.titlesize'] = 14
mpl.rcParams['axes.labelsize'] = 13
mpl.rcParams['xtick.labelsize'] = 12
mpl.rcParams['ytick.labelsize'] = 12
mpl.rcParams['legend.fontsize'] = 12

%matplotlib inline

In [None]:
def mk_dir_fn(path_):
    
    if not os.path.isdir(path_):
        os.mkdir(path_)
        
def si_fn(R, mean_month_rainfall):
    third_list = [] 
    for Xj in mean_month_rainfall:
    
        first_part = 1/R
        third_part = Xj - (R/12)

        if third_part <0:
            third_part_ = third_part*(-1)

        else:
            third_part_ = third_part
        third_list.append(third_part_)    

    si = first_part * sum(third_list)
    #print("Annual seasonal index: ", si)
    return si

In [None]:
# type of data
d_type = "daily_rain"
#d_type = "et_morton_actual"
#d_type = "max_temp"
#d_type = "min_temp"
#d_type = "rh_tmax"
#d_type = "rh_tmin"
# d_type = "vp"


if d_type == "daily_rain":
    str_type = "precipitation"
    y_lim = [-5, 30]
    
elif d_type == "et_morton_actual":
    str_type = "Morton's evapotranspiration"
    y_lim = [0, 10]

elif d_type == "max_temp":
    str_type = "maximum temperature"
    y_lim = [20, 45]

elif d_type == "min_temp":
    str_type = "minimum temperature"
    y_lim = [0, 40]

elif d_type == "rh_tmax":
    str_type = "maximum relative humidity"
    y_lim = [3200, 3400]
    
elif d_type == "rh_tmin":
    str_type = "minimum relative humidity"
    y_lim = [3200, 3400]

elif d_type == "vp":
    str_type = "vapour pressure"
    
else:
    pass

# met data
#dir_ = r"X:\PGB\RSU\biomass\raw_zonal_stats\met\collation\slats_tern\{0}".format(d_type)
dir_ = r"X:\PGB\RSU\biomass\zonal_stats_raw\met_clean\{0}".format(d_type)

# plot directories
export_dir = r"X:\PGB\RSU\biomass\plots\slats_tern\{0}".format(d_type)
mk_dir_fn(export_dir)

export_box = os.path.join(export_dir, "box_plots")
mk_dir_fn(export_box)
export_line = os.path.join(export_dir, "line_plots")
mk_dir_fn(export_line)


csv_export = r"X:\PGB\RSU\biomass\met_zonal_stats_per_site\slats_tern\{0}".format(d_type)
mk_dir_fn(csv_export)

monthly_dir = os.path.join(csv_export, "mean_monthly_{0}".format(d_type))
mk_dir_fn(monthly_dir)

mean_monthly_dir = os.path.join(csv_export, "monthly_{0}".format(d_type))
mk_dir_fn(mean_monthly_dir)

seasonal_dir = os.path.join(csv_export, "seasonal_{0}".format(d_type))
mk_dir_fn(seasonal_dir)

si_dir = os.path.join(csv_export, "si_{0}".format(d_type))
mk_dir_fn(si_dir)

In [None]:
for f in glob(os.path.join(dir_, "*.csv")):
    csv_file = f
    print("-"*100)
    print("working for: ", f)
    
    df = pd.read_csv(csv_file)
    df.dropna(inplace=True)
    df1 = pd.read_csv(csv_file)
    df1.dropna(inplace=True)
    
    # extract name and site year
    _, f = os.path.split(csv_file)
    flist = f.split("_")
    site_name, field_year = flist[0].split(".")
    print(site_name, field_year)
    
    # splitting date time to year month and day
    year = []
    month = []
    day = []

    for index, row in df.iterrows():
        #print(row["im_date"])
        date = str(row["im_date"])
        year.append(date[:4])
        month.append(date[4:6])
        day.append(date[6:])

    df["year"] = year
    df["month"] = month
    df["day"] = day
    
    # collate df
    year_list = []
    month_list = []

    mean_list = []
    
    #print(list(df.columns))

#     # ---------------- calulate montly mean met value across all data ---------------------------
    
    for y in df["year"].unique():
        #year_list.append(y)
        y_df = df[df["year"]== y]

        for m in y_df["month"].unique():
            my_df = y_df[y_df["month"]== m]
            
            col_list = my_df.columns.tolist()
            #print(col_list)
            matches = [match for match in col_list if "mean" in match]
            #print(matches[0])
            ym = my_df[matches[0]].tolist()
            year_list.append(y)
            month_list.append(m)
            mean_list.append(mean(ym))
            
            
    monthly_df = pd.DataFrame({"year": year_list,
                     "month": month_list,
                    "mean": mean_list})
    
    monthly_df.insert(0, "site", site_name)
    monthly_df.insert(1, "site_year", field_year)
    monthly_df.insert(2, "met", d_type)
    
    monthly_out = os.path.join(monthly_dir, f"{site_name}_monthly_{d_type}.csv")
    monthly_df.to_csv(monthly_out, index = False)
    
#     # ------------------------------------------------------------------------------------------
    
    
    month_list2 = []
    mean_month_list = []
    month_list3 = []

    for m in monthly_df["month"].unique():
        #print("month: ", m)
        #month_list3.append(m)

        my_df = monthly_df[monthly_df["month"]== m]
        month_var = my_df["mean"].tolist()
        
        mean_month_list.append(mean(month_var))
        month_list3.append(month_var)
        month_list2.append(m)
    
    #print("month_list2: ", month_list2)
    #print("mean_month_list: ", mean_month_list)
    #print("="*50)
    mean_monthly_df = pd.DataFrame({
                             "month": month_list2,
                            "var": mean_month_list})
    
    mean_monthly_df.insert(0, "site", site_name)
    mean_monthly_df.insert(1, "site_year", field_year)
    mean_monthly_df.insert(2, "met", d_type)
    
    mean_monthly_out = os.path.join(mean_monthly_dir, f"{site_name}_mean_monthly_{d_type}.csv")
    mean_monthly_df.to_csv(mean_monthly_out, index = False)
    
    print("export: ", mean_monthly_out)

    
    # ------------------------------------------ Box Plot -----------------------------------------------
        
    data = month_list3
    #print(data)

    fig = plt.figure(figsize =(10, 7))
    plt.ylim(y_lim[0], y_lim[1])

    plt.xticks([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 
               ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'June', 'July', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'], 
               #rotation=90,  fontsize='12',
              )
#     plt.xticks(fontsize='12')
#     plt.yticks(fontsize='12')
    plt.title(f'Average daily {str_type} variation per month between 1988 and 2023 at site: {site_name}.', horizontalalignment='center')#fontsize='16',
    # Custom Axis title
    plt.xlabel('Months',  horizontalalignment='center')# fontsize='14',
    # Custom Axis title
    plt.ylabel('Average daily {0} (mm)'.format(str_type),   horizontalalignment='center')#fontsize='14',
    # Creating plot
    plt.boxplot(data, showfliers = False)

    box_out = os.path.join(export_box, f"{site_name}_{field_year}_{d_type}_box.jpg")
    plt.savefig(box_out)
    
    #show plot
    plt.show()
    
    print("box plot output: ", box_out)
    
    # ---------------------- Time Trace --------------------------

    # Time trace 
    y_list = monthly_df["year"].tolist()
    m_list = monthly_df["month"].tolist()
    val_list = monthly_df["mean"].tolist()
    dt_list2 = []
    year__ = monthly_df["year"].unique().tolist()
    for y, m, v  in zip(y_list, m_list, val_list):
        i = f"{str(y)}{str(m)}15"

        dt_list2.append(pd.to_datetime(i, format='%Y%m%d'))

    df3 = pd.DataFrame(val_list, index=dt_list2, columns=["met"])

    fig, axs = plt.subplots(1,1, figsize=(18,5))
    x = df3.index
    y = df3.met
    axs.plot(x, y, label='Rainfall') #, fontsize='12')
#     fig.show()
#     axs
    
    plt.title('Time trace mean {0} between 1988 and 2023, site: {1} .'.format(str_type, site_name), horizontalalignment='center')# fontsize='16',
    # Custom Axis title
    plt.xlabel('Years',  horizontalalignment='center')#  fontsize='14',
    # Custom Axis title
    plt.ylabel('Mean monthly {0} (mm)'.format(str_type),   horizontalalignment='center')# fontsize='14',

    axs.set_xticklabels(year__)
    axs.xaxis.set_ticks(year__)
    plt.xticks(rotation=90, ha='right') #, fontsize='12')
    # limit data time
    plt.xlim(['1993', '2023'])
    
    
    median_value = np.median(y)
    axs.axhline(median_value, color='grey', linestyle='--', label=f'Median: {median_value:.2f}') #, fontsize='12')
    axs.legend()
    
    mean_value = np.mean(y)
    axs.axhline(mean_value, color='black', linestyle='--', label=f'Mean: {mean_value:.2f}') #, fontsize='12')
    axs.legend()

    # Adjust layout to make room for the labels
    plt.tight_layout()

    #plt.ylim([0, :]) not sure if this will work
    line_out = os.path.join(export_line, f"{site_name}_{field_year}_{d_type}_line.jpg")
    plt.savefig(line_out, bbox_inches='tight')
    fig.show()     
    print("Timetrace plot output: ", line_out)
    
    # ---------------- calulate montly mean met value across all data ---------------------------

    dict_ = {"year": [],
            "month": [],
             "ym_total": [],
            "annual_total": [],
            }

    djf = []
    mam = []
    jja = []
    son = []
    dry = []
    wet = []
    df.dropna(inplace=True)
    
    # seperate per year
    for y in df["year"].unique():
        #year_list.append(y)
        y_df = df[df["year"]== y]
        
        col_list = y_df.columns.tolist()
        #print(col_list)
        matches = [match for match in col_list if "mean" in match]
        #print(matches[0])
        mean_values = y_df[matches[0]].tolist()
        #print("mean_heading: ", mean_heading)

#         y_sum = y_df[mean_heading].tolist()
#         print("y_sum values: ", y_sum)
        annual_totals = sum(mean_values)

        #seperate per month per year
        for m in y_df["month"].unique():
            my_df = y_df[y_df["month"]== m]

            dict_["year"].append(y)
            dict_["month"].append(m)

            #print(my_df)

            col_list = my_df.columns.tolist()
            #print(col_list)
            matches = [match for match in col_list if "mean" in match]
            #print(matches[0])
            ym_values = my_df[matches[0]].tolist()

            #ym_values = my_df[mean_heading].tolist()
            #print("ym_sum values: ", ym_sum)
            month_year_totals = sum(ym_values)
            #print("annual_totals: ", annual_totals)

            dict_["annual_total"].append(annual_totals)
            dict_["ym_total"].append(month_year_totals)


    new_df = pd.DataFrame(dict_)

    new_df
    # new_df["month_avg"] = 0
    month_avg = []

    # Calculate the monthly average
    for m in new_df["month"].unique():
        #year_list.append(y)
        m_df = new_df[new_df["month"]== m]

        m_sum = m_df["ym_total"].tolist()
        #print("m_sum values: ", m_sum)
        month_average = sum(m_sum)
        m_df["month_avg"] = month_average 
        month_avg.append(m_df)

    out_df = pd.concat(month_avg)


    year = []

    djf_list = []
    mam_list = []

    dict_ = {"site": [],
            "year": [],
            "djf": [],
            "mam": [],
            "jja": [],
            "son": [],
            "dry": [],
            "wet": [],
             "annual" : [],
             #"si": [],
            }
    # seperate per year
    for y in out_df["year"].unique():
        #year_list.append(y)
        int_lst_y = int(y)-1
        str_l_year = str(int_lst_y)
        y_df = out_df[out_df["year"]== y]
        prev_y_df  = out_df[out_df["year"]== str_l_year]
        dict_["year"].append(y)

        tot_list = y_df["annual_total"].unique().tolist()
        dict_["annual"].append(tot_list[0])

        dict_["site"].append(site_name)

        #seperate into seasons
        djf_df1 = y_df[(y_df["month"]== "01") | (y_df["month"]== "02")]
        djf_df2 = prev_y_df[prev_y_df["month"]== "12"]
        djf_df = pd.concat([djf_df1, djf_df2])
        djf = djf_df["ym_total"].tolist()
        djf_df["djf_tot"] = sum(djf)
        dict_["djf"].append(sum(djf))


        #seperate into seasons
        mam_df = y_df[(y_df["month"]== "03") | (y_df["month"]== "04") | (y_df["month"]== "05")]
        mam = mam_df["ym_total"].tolist()
        mam_df["mam_tot"] = sum(mam)
        dict_["mam"].append(sum(mam))

        #seperate into seasons
        jja_df = y_df[(y_df["month"]== "06") | (y_df["month"]== "07") | (y_df["month"]== "08")]
        jja = jja_df["ym_total"].tolist()
        jja_df["jja_tot"] = sum(jja)
        dict_["jja"].append(sum(jja))

        #seperate into seasons
        son_df = y_df[(y_df["month"]== "09") | (y_df["month"]== "10") | (y_df["month"]== "11")]
        son = son_df["ym_total"].tolist()
        son_df["son_tot"] = sum(son)
        dict_["son"].append(sum(son))

        #seperate into seasons
        dry_df = y_df[(y_df["month"]== "05") | (y_df["month"]== "06") | (y_df["month"]== "07") | (y_df["month"]== "08") | (y_df["month"]== "09")]
        dry = dry_df["ym_total"].tolist()
        dry_df["dry_tot"] = sum(dry)
        dict_["dry"].append(sum(dry))

        #seperate into seasons
        wet_df = y_df[(y_df["month"]== "01") | (y_df["month"]== "02") | (y_df["month"]== "03") | (y_df["month"]== "04") | (y_df["month"]== "10") | (y_df["month"]== "11") | (y_df["month"]== "12")]
        wet = wet_df["ym_total"].tolist()
        wet_df["wet_tot"] = sum(wet)
        dict_["wet"].append(sum(wet))

    seasonal_df = pd.DataFrame(dict_)
    csv_out = os.path.join(seasonal_dir, f"seasonal_{site_name}_{field_year}_{d_type}.csv")
    print("export: ", csv_out)
    seasonal_df.to_csv(csv_out, index=False)
    
    # --------------------------------------------------- seasonal index -------------------------------------
    print("="*50)
    dict2 = {"site": [],
             "year": [],
         "month": [],
         "month_total": [],
         "month_average": [],
        "annual_total": [],
        "annual_average_R": [],
        "annual_si": []
        }
    # seperate per year
    for y in df["year"].unique():
        y_df = df[df["year"]== y]
        
        col_list = y_df.columns.tolist()
        #print(col_list)
        matches = [match for match in col_list if "mean" in match]
        print(matches[0])
        
        y_sum = y_df[matches[0]].tolist()
        #calucalue annual rainfall totals
        annual_total = sum(y_sum)
        #print("annual_total: ", annual_total)

        # calculate the annual rainfall mean
        R = mean(y_sum)
        #print("annual average R: ", R)
        #print(y_sum)

        month_avg = []
        #seperate per month per year
        month_list = y_df["month"].unique()
        for m in month_list:
            my_df = y_df[y_df["month"]== m]
            ym_sum = my_df[matches[0]].tolist()
            # Calculate monthly rainfall totals per year
            month_year_totals = sum(ym_sum)

            # Calculate monthly ranfall per year averages
            month_year_avg = mean(ym_sum)
            month_avg.append(month_year_avg)

            # append values to dictonary
            dict2["site"].append(site_name)
            dict2["year"].append(y)
            dict2["month"].append(m)
            dict2["annual_total"].append(annual_total)
            dict2["annual_average_R"].append(R)
            dict2["month_total"].append(month_year_totals)
            dict2["month_average"].append(month_year_avg)
            

        # append si values
        for i in month_list:
            # call si funcution
            #print("month_avg: ", month_avg)
            si = si_fn(R, month_avg)
            #print("si: ", si)
            dict2["annual_si"].append(si)
            
    si_df = pd.DataFrame(dict2)
    si_out = os.path.join(si_dir, f"seasonal_{site_name}_{field_year}_{d_type}.csv")
    print("export: ", si_out)
    si_df.to_csv(si_out, index=False)

# END

annual total = sum of the mean rainfall across all months in a year

month_avg = the sum of the mean monthly rainfall for each month across all years

# Plot all met variables

In [None]:
# type of data
d_type = "daily_rain"
#d_type = "et_morton_actual"
#d_type = "max_temp"
#d_type = "min_temp"
#d_type = "rh_tmax"
#d_type = "rh_tmin"
# d_type = "vp"


if d_type == "daily_rain":
    str_type = "precipitation"
    y_lim = [-5, 25]
    
elif d_type == "et_morton_actual":
    str_type = "Morton's evapotranspiration"
    y_lim = [0, 10]

elif d_type == "max_temp":
    str_type = "maximum temperature"
    y_lim = [20, 45]

elif d_type == "min_temp":
    str_type = "minimum temperature"
    y_lim = [0, 40]

elif d_type == "rh_tmax":
    str_type = "maximum relative humidity"
    y_lim = [3200, 3400]
    
elif d_type == "rh_tmin":
    str_type = "minimum relative humidity"
    y_lim = [3200, 3400]

elif d_type == "vp":
    str_type = "vapour pressure"
    
elif d_type == "all":
    str_type = "meterological variables"
    
else:
    pass

# met data
#dir_ = r"X:\PGB\RSU\biomass\raw_zonal_stats\met\collation\slats_tern\{0}".format(d_type)
rain_dir_ = r"X:\PGB\RSU\biomass\zonal_stats_raw\met_clean\daily_rain"
et_dir_ = r"X:\PGB\RSU\biomass\zonal_stats_raw\met_clean\et_morton_actual"
max_tem_dir_ = r"X:\PGB\RSU\biomass\zonal_stats_raw\met_clean\max_temp"
rain_dir_ = r"X:\PGB\RSU\biomass\zonal_stats_raw\met_clean\daily_rain"
rain_dir_ = r"X:\PGB\RSU\biomass\zonal_stats_raw\met_clean\daily_rain"
rain_dir_ = r"X:\PGB\RSU\biomass\zonal_stats_raw\met_clean\daily_rain"

# plot directories
export_dir = r"X:\PGB\RSU\biomass\plots\slats_tern\{0}".format(d_type)
mk_dir_fn(export_dir)

export_box = os.path.join(export_dir, "box_plots")
mk_dir_fn(export_box)
export_line = os.path.join(export_dir, "line_plots")
mk_dir_fn(export_line)


csv_export = r"X:\PGB\RSU\biomass\met_zonal_stats_per_site\slats_tern\{0}".format(d_type)
mk_dir_fn(csv_export)

monthly_dir = os.path.join(csv_export, "mean_monthly_{0}".format(d_type))
mk_dir_fn(monthly_dir)

mean_monthly_dir = os.path.join(csv_export, "monthly_{0}".format(d_type))
mk_dir_fn(mean_monthly_dir)

seasonal_dir = os.path.join(csv_export, "seasonal_{0}".format(d_type))
mk_dir_fn(seasonal_dir)

si_dir = os.path.join(csv_export, "si_{0}".format(d_type))
mk_dir_fn(si_dir)

In [None]:
for f in glob(os.path.join(dir_, "*.csv")):
    csv_file = f
    print("-"*100)
    print("working for: ", f)
    
    df = pd.read_csv(csv_file)
    df.dropna(inplace=True)
    df1 = pd.read_csv(csv_file)
    df1.dropna(inplace=True)
    
    # extract name and site year
    _, f = os.path.split(csv_file)
    flist = f.split("_")
    site_name, field_year = flist[0].split(".")
    print(site_name, field_year)
    
    # splitting date time to year month and day
    year = []
    month = []
    day = []

    for index, row in df.iterrows():
        #print(row["im_date"])
        date = str(row["im_date"])
        year.append(date[:4])
        month.append(date[4:6])
        day.append(date[6:])

    df["year"] = year
    df["month"] = month
    df["day"] = day
    
    # collate df
    year_list = []
    month_list = []

    mean_list = []
    
    #print(list(df.columns))

#     # ---------------- calulate montly mean met value across all data ---------------------------
    
    for y in df["year"].unique():
        #year_list.append(y)
        y_df = df[df["year"]== y]

        for m in y_df["month"].unique():
            my_df = y_df[y_df["month"]== m]
            
            col_list = my_df.columns.tolist()
            #print(col_list)
            matches = [match for match in col_list if "mean" in match]
            #print(matches[0])
            ym = my_df[matches[0]].tolist()
            year_list.append(y)
            month_list.append(m)
            mean_list.append(mean(ym))
            
            
    monthly_df = pd.DataFrame({"year": year_list,
                     "month": month_list,
                    "mean": mean_list})
    
    monthly_df.insert(0, "site", site_name)
    monthly_df.insert(1, "site_year", field_year)
    monthly_df.insert(2, "met", d_type)
    
    monthly_out = os.path.join(monthly_dir, f"{site_name}_monthly_{d_type}.csv")
    monthly_df.to_csv(monthly_out, index = False)
    
#     # ------------------------------------------------------------------------------------------
    
    
    month_list2 = []
    mean_month_list = []
    month_list3 = []

    for m in monthly_df["month"].unique():
        #print("month: ", m)
        #month_list3.append(m)

        my_df = monthly_df[monthly_df["month"]== m]
        month_var = my_df["mean"].tolist()
        
        mean_month_list.append(mean(month_var))
        month_list3.append(month_var)
        month_list2.append(m)
    
    #print("month_list2: ", month_list2)
    #print("mean_month_list: ", mean_month_list)
    #print("="*50)
    mean_monthly_df = pd.DataFrame({
                             "month": month_list2,
                            "var": mean_month_list})
    
    mean_monthly_df.insert(0, "site", site_name)
    mean_monthly_df.insert(1, "site_year", field_year)
    mean_monthly_df.insert(2, "met", d_type)
    
    mean_monthly_out = os.path.join(mean_monthly_dir, f"{site_name}_mean_monthly_{d_type}.csv")
    mean_monthly_df.to_csv(mean_monthly_out, index = False)
    
    print("export: ", mean_monthly_out)

    
    # ------------------------------------------ Box Plot -----------------------------------------------
        
    data = month_list3
    #print(data)

    fig = plt.figure(figsize =(10, 7))
    plt.ylim(y_lim[0], y_lim[1])

    plt.xticks([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 
               ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'June', 'July', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'], 
               #rotation=90,  fontsize='12',
              )
    plt.title(f'Mean monthly {str_type} variation between 1988 and 2023 at site: {site_name}.',  fontsize='12', horizontalalignment='center')
    # Custom Axis title
    plt.xlabel('Months',  fontsize='12', horizontalalignment='center')
    # Custom Axis title
    plt.ylabel('Monthly mean {0} (mm)'.format(str_type),  fontsize='12', horizontalalignment='center')
    # Creating plot
    plt.boxplot(data, showfliers = False)

    box_out = os.path.join(export_box, f"{site_name}_{field_year}_{d_type}_box.jpg")
    plt.savefig(box_out)
    
    #show plot
    plt.show()
    
    print("box plot output: ", box_out)
    
    # ---------------------- Time Trace --------------------------

    # Time trace 
    y_list = monthly_df["year"].tolist()
    m_list = monthly_df["month"].tolist()
    val_list = monthly_df["mean"].tolist()
    dt_list2 = []
    year__ = monthly_df["year"].unique().tolist()
    for y, m, v  in zip(y_list, m_list, val_list):
        i = f"{str(y)}{str(m)}15"

        dt_list2.append(pd.to_datetime(i, format='%Y%m%d'))

    df3 = pd.DataFrame(val_list, index=dt_list2, columns=["met"])

    fig, axs = plt.subplots(1,1, figsize=(18,5))
    x = df3.index
    y = df3.met
    axs.plot(x, y, label='Rainfall')
    fig.show() 
    
    plt.title('Time trace mean {0} between 1988 and 2023, site: {1} .'.format(str_type, site_name),  fontsize='14', horizontalalignment='center')
    # Custom Axis title
    plt.xlabel('Years',  fontsize='14', horizontalalignment='center')
    # Custom Axis title
    plt.ylabel('Mean monthly {0} (mm)'.format(str_type),  fontsize='14', horizontalalignment='center')

    axs.set_xticklabels(year__)
    axs.xaxis.set_ticks(year__)
    plt.xticks(rotation=90, ha='right', fontsize='12')
    # limit data time
    plt.xlim(['1993', '2023'])
    
    median_value = np.median(y)
    axs.axhline(median_value, color='grey', linestyle='--', label=f'Median: {median_value:.2f}')
    axs.legend()
    
    mean_value = np.mean(y)
    axs.axhline(mean_value, color='black', linestyle='--', label=f'Mean: {mean_value:.2f}')
    axs.legend()

    #plt.ylim([0, :]) not sure if this will work
    line_out = os.path.join(export_line, f"{site_name}_{field_year}_{d_type}_line.jpg")
    plt.savefig(line_out)
    fig.show()     
    print("Timetrace plot output: ", line_out)
    
    # ---------------- calulate montly mean met value across all data ---------------------------

    dict_ = {"year": [],
            "month": [],
             "ym_total": [],
            "annual_total": [],
            }

    djf = []
    mam = []
    jja = []
    son = []
    dry = []
    wet = []
    df.dropna(inplace=True)
    
    # seperate per year
    for y in df["year"].unique():
        #year_list.append(y)
        y_df = df[df["year"]== y]
        
        col_list = y_df.columns.tolist()
        #print(col_list)
        matches = [match for match in col_list if "mean" in match]
        #print(matches[0])
        mean_values = y_df[matches[0]].tolist()
        #print("mean_heading: ", mean_heading)

#         y_sum = y_df[mean_heading].tolist()
#         print("y_sum values: ", y_sum)
        annual_totals = sum(mean_values)

        #seperate per month per year
        for m in y_df["month"].unique():
            my_df = y_df[y_df["month"]== m]

            dict_["year"].append(y)
            dict_["month"].append(m)

            #print(my_df)

            col_list = my_df.columns.tolist()
            #print(col_list)
            matches = [match for match in col_list if "mean" in match]
            #print(matches[0])
            ym_values = my_df[matches[0]].tolist()

            #ym_values = my_df[mean_heading].tolist()
            #print("ym_sum values: ", ym_sum)
            month_year_totals = sum(ym_values)
            #print("annual_totals: ", annual_totals)

            dict_["annual_total"].append(annual_totals)
            dict_["ym_total"].append(month_year_totals)


    new_df = pd.DataFrame(dict_)

    new_df
    # new_df["month_avg"] = 0
    month_avg = []

    # Calculate the monthly average
    for m in new_df["month"].unique():
        #year_list.append(y)
        m_df = new_df[new_df["month"]== m]

        m_sum = m_df["ym_total"].tolist()
        #print("m_sum values: ", m_sum)
        month_average = sum(m_sum)
        m_df["month_avg"] = month_average 
        month_avg.append(m_df)

    out_df = pd.concat(month_avg)


    year = []

    djf_list = []
    mam_list = []

    dict_ = {"site": [],
            "year": [],
            "djf": [],
            "mam": [],
            "jja": [],
            "son": [],
            "dry": [],
            "wet": [],
             "annual" : [],
             #"si": [],
            }
    # seperate per year
    for y in out_df["year"].unique():
        #year_list.append(y)
        int_lst_y = int(y)-1
        str_l_year = str(int_lst_y)
        y_df = out_df[out_df["year"]== y]
        prev_y_df  = out_df[out_df["year"]== str_l_year]
        dict_["year"].append(y)

        tot_list = y_df["annual_total"].unique().tolist()
        dict_["annual"].append(tot_list[0])

        dict_["site"].append(site_name)

        #seperate into seasons
        djf_df1 = y_df[(y_df["month"]== "01") | (y_df["month"]== "02")]
        djf_df2 = prev_y_df[prev_y_df["month"]== "12"]
        djf_df = pd.concat([djf_df1, djf_df2])
        djf = djf_df["ym_total"].tolist()
        djf_df["djf_tot"] = sum(djf)
        dict_["djf"].append(sum(djf))


        #seperate into seasons
        mam_df = y_df[(y_df["month"]== "03") | (y_df["month"]== "04") | (y_df["month"]== "05")]
        mam = mam_df["ym_total"].tolist()
        mam_df["mam_tot"] = sum(mam)
        dict_["mam"].append(sum(mam))

        #seperate into seasons
        jja_df = y_df[(y_df["month"]== "06") | (y_df["month"]== "07") | (y_df["month"]== "08")]
        jja = jja_df["ym_total"].tolist()
        jja_df["jja_tot"] = sum(jja)
        dict_["jja"].append(sum(jja))

        #seperate into seasons
        son_df = y_df[(y_df["month"]== "09") | (y_df["month"]== "10") | (y_df["month"]== "11")]
        son = son_df["ym_total"].tolist()
        son_df["son_tot"] = sum(son)
        dict_["son"].append(sum(son))

        #seperate into seasons
        dry_df = y_df[(y_df["month"]== "05") | (y_df["month"]== "06") | (y_df["month"]== "07") | (y_df["month"]== "08") | (y_df["month"]== "09")]
        dry = dry_df["ym_total"].tolist()
        dry_df["dry_tot"] = sum(dry)
        dict_["dry"].append(sum(dry))

        #seperate into seasons
        wet_df = y_df[(y_df["month"]== "01") | (y_df["month"]== "02") | (y_df["month"]== "03") | (y_df["month"]== "04") | (y_df["month"]== "10") | (y_df["month"]== "11") | (y_df["month"]== "12")]
        wet = wet_df["ym_total"].tolist()
        wet_df["wet_tot"] = sum(wet)
        dict_["wet"].append(sum(wet))

    seasonal_df = pd.DataFrame(dict_)
    csv_out = os.path.join(seasonal_dir, f"seasonal_{site_name}_{field_year}_{d_type}.csv")
    print("export: ", csv_out)
    seasonal_df.to_csv(csv_out, index=False)
    
    # --------------------------------------------------- seasonal index -------------------------------------
    print("="*50)
    dict2 = {"site": [],
             "year": [],
         "month": [],
         "month_total": [],
         "month_average": [],
        "annual_total": [],
        "annual_average_R": [],
        "annual_si": []
        }
    # seperate per year
    for y in df["year"].unique():
        y_df = df[df["year"]== y]
        
        col_list = y_df.columns.tolist()
        #print(col_list)
        matches = [match for match in col_list if "mean" in match]
        print(matches[0])
        
        y_sum = y_df[matches[0]].tolist()
        #calucalue annual rainfall totals
        annual_total = sum(y_sum)
        #print("annual_total: ", annual_total)

        # calculate the annual rainfall mean
        R = mean(y_sum)
        #print("annual average R: ", R)
        #print(y_sum)

        month_avg = []
        #seperate per month per year
        month_list = y_df["month"].unique()
        for m in month_list:
            my_df = y_df[y_df["month"]== m]
            ym_sum = my_df[matches[0]].tolist()
            # Calculate monthly rainfall totals per year
            month_year_totals = sum(ym_sum)

            # Calculate monthly ranfall per year averages
            month_year_avg = mean(ym_sum)
            month_avg.append(month_year_avg)

            # append values to dictonary
            dict2["site"].append(site_name)
            dict2["year"].append(y)
            dict2["month"].append(m)
            dict2["annual_total"].append(annual_total)
            dict2["annual_average_R"].append(R)
            dict2["month_total"].append(month_year_totals)
            dict2["month_average"].append(month_year_avg)
            

        # append si values
        for i in month_list:
            # call si funcution
            #print("month_avg: ", month_avg)
            si = si_fn(R, month_avg)
            #print("si: ", si)
            dict2["annual_si"].append(si)
            
    si_df = pd.DataFrame(dict2)
    si_out = os.path.join(si_dir, f"seasonal_{site_name}_{field_year}_{d_type}.csv")
    print("export: ", si_out)
    si_df.to_csv(si_out, index=False)