In [None]:
import pandas as pd

def read_csv_to_dataframe(file_path):
    try:
        df = pd.read_csv(file_path)
        return df
    except Exception as a:
        print("Error reading the CSV file: {a}")
        return None

In [None]:
terminos_df = read_csv_to_dataframe("Terminos_lagoon_TA_DIC_2023_RawData.csv")

In [29]:
print(terminos_df.head())

   sample      date     estuary   area station layer_depth season  \
0  CDL01S  5/3/2020  Candelaria  River   CDL01     Surface    Dry   
1  CDL01F  5/3/2020  Candelaria  River   CDL01      Bottom    Dry   
2  CDL02S  5/3/2020  Candelaria  River   CDL02     Surface    Dry   
3  CDL02F  5/3/2020  Candelaria  River   CDL02      Bottom    Dry   
4  CDL03S  5/3/2020  Candelaria  River   CDL03     Surface    Dry   

   chlorophy_microg_l  cond_microsiemens_cm  depth_m  ...  do_mg_l  sal_psu  \
0                0.36                7015.4    0.464  ...     7.12     3.56   
1                4.19               29886.1    7.792  ...     4.90    16.97   
2                0.92               16691.1    0.453  ...     6.99     8.94   
3                2.23               24847.4    1.261  ...     6.52    13.87   
4                0.58               46341.6    0.465  ...     6.24    28.06   

   sp_cond_microsiemens_cm  turbidity_fnu  temp_c  latitude  longitude  \
0                   6547.7          

In [None]:
from scipy.stats import kruskal

def kruskal_wallis_test(*groups, alpha=0.05):

    result = kruskal(*groups)
    
    print(f"H statistic = {result.statistic}")
    print(f"p-value = {result.pvalue}")
    
    if result.pvalue < alpha:
        print("The difference between groups is statistically significant.")
    else:
        print("No statistically significant difference between groups.")

In [None]:
import scikit_posthocs as sp
# You need to install the scikit-posthocs package: conda install -c conda-forge scikit-posthocs

def dunn_posthoc(data, group_col, value_col, p_adjust='bonferroni'):
    """
    Runs Dunn's post-hoc test after Kruskal-Wallis.
    """
    result = sp.posthoc_dunn(data, val_col=value_col, group_col=group_col, p_adjust=p_adjust)
    print(result)
    return result

In [None]:
# Plot ta values for each area of the lagoon using seaborn
import seaborn as sns
import matplotlib.pyplot as plt

sns.boxplot(x='area', y='ta_micromol_kg', data=terminos_df)
plt.xlabel('Area of the lagoon')
plt.ylabel('Total Alkalinity (micromol/kg)')
plt.title('Total Alkalinity by Area of the Lagoon')
plt.show()

In [None]:
# Select the data for each area of the lagoon
ta_river = terminos_df[terminos_df['area'] == 'River']['ta_micromol_kg']
ta_plume = terminos_df[terminos_df['area'] == 'Plume']['ta_micromol_kg']
ta_coast= terminos_df[terminos_df['area'] == 'Coast']['ta_micromol_kg']

In [None]:
# Perform the Kruskal-Wallis test
kruskal_wallis_test(ta_river, ta_plume, ta_coast)

In [None]:
dunn_posthoc(terminos_df, 'area', 'ta_micromol_kg')

# Perform Kruskal-Wallis test and plot a box plot for DIC

In [None]:
# Plot ta values for each area of the lagoon using seaborn
import seaborn as sns
import matplotlib.pyplot as plt

sns.boxplot(x='area', y='dic_micromol_kg', hue='area', data=terminos_df, palette="Set3", legend=False)
plt.xlabel('Area of the lagoon')
plt.ylabel('DIC (micromol/kg)')
plt.title('DIC by Area of the Lagoon')
plt.show()

In [None]:
# Select the data for each area of the lagoon
dic_river = terminos_df[terminos_df['area'] == 'River']['dic_micromol_kg']
dic_plume = terminos_df[terminos_df['area'] == 'Plume']['dic_micromol_kg']
dic_coast= terminos_df[terminos_df['area'] == 'Coast']['dic_micromol_kg']

In [None]:
# Perform the Kruskal-Wallis test
kruskal_wallis_test(dic_river, dic_plume, dic_coast)

In [None]:
dunn_posthoc(terminos_df, 'area', 'dic_micromol_kg')

# Perform Kruskal-Wallis test and plot a box plot for temperature

In [None]:
# Plot temperature values for each area of the lagoon using seaborn
import seaborn as sns
import matplotlib.pyplot as plt

sns.boxplot(x='area', y='temp_c', hue='area', data=terminos_df, palette="Set3", legend=False)
plt.xlabel('Area of the lagoon')
plt.ylabel('Temperature (C)')
plt.title('Temperature by Area of the Lagoon')
plt.show()

In [None]:
# Select the data for each area of the lagoon
temp_river = terminos_df[terminos_df['area'] == 'River']['temp_c']
temp_plume = terminos_df[terminos_df['area'] == 'Plume']['temp_c']
temp_coast= terminos_df[terminos_df['area'] == 'Coast']['temp_c']

In [None]:
# Perform the Kruskal-Wallis test
kruskal_wallis_test(temp_river, temp_plume, temp_coast)

In [None]:
dunn_posthoc(terminos_df, 'area', 'temp_c')