In [1]:
import pandas as pd

def read_csv_to_dataframe(file_path):
    try:
        df = pd.read_csv(file_path)
        return df
    except Exception as a:
        print("Error reading the CSV file: {a}")
        return None

In [2]:
terminos_df = read_csv_to_dataframe("Terminos_lagoon_TA_DIC_2023_RawData.csv")

In [3]:
print(terminos_df.head())

   sample      date     estuary   area station layer_depth season  \
0  CDL01S  5/3/2020  Candelaria  River   CDL01     Surface    Dry   
1  CDL01F  5/3/2020  Candelaria  River   CDL01      Bottom    Dry   
2  CDL02S  5/3/2020  Candelaria  River   CDL02     Surface    Dry   
3  CDL02F  5/3/2020  Candelaria  River   CDL02      Bottom    Dry   
4  CDL03S  5/3/2020  Candelaria  River   CDL03     Surface    Dry   

   chlorophy_microg_l  cond_microsiemens_cm  depth_m  ...  do_mg_l  sal_psu  \
0                0.36                7015.4    0.464  ...     7.12     3.56   
1                4.19               29886.1    7.792  ...     4.90    16.97   
2                0.92               16691.1    0.453  ...     6.99     8.94   
3                2.23               24847.4    1.261  ...     6.52    13.87   
4                0.58               46341.6    0.465  ...     6.24    28.06   

   sp_cond_microsiemens_cm  turbidity_fnu  temp_c  latitude  longitude  \
0                   6547.7          

In [4]:
from scipy import stats

def test_spearman(x, y, alpha=0.05):
    if len(x) != len(y):
        raise ValueError("Las variables deben tener la misma longitud.")

    rho, pval = stats.spearmanr(x, y)

    print(f"Spearman's correlation coefficient (rho): {rho:.3f}")
    print(f"p-value: {pval:.4f}")

    if pval < alpha:
        print(f"✔️ Significant relationship (p < {alpha:.3f})")
    else:
        print(f"⚠️ No significant relationship (p ≥ {alpha:.3f})")

    if rho > 0:
        print("📈 Positive correlation")
    else:
        print("📉 Negative correlation")

In [5]:
test_spearman(terminos_df["ta_micromol_kg"], terminos_df["dic_micromol_kg"])

Spearman's correlation coefficient (rho): 0.838
p-value: 0.0000
✔️ Significant relationship (p < 0.050)
📈 Positive correlation


# Exercise: Create a function to compute the Pearson correlation.


In [6]:
from scipy import stats

def test_pearson(x, y, alpha=0.05):
    if len(x) != len(y):
        raise ValueError("Las variables deben tener la misma longitud.")

    r, pval = stats.pearsonr(x, y)

    print(f"Pearson's correlation coefficient (r): {r:.3f}")
    print(f"p-value: {pval:.4f}")

    if pval < alpha:
        print(f"✔️ Significant relationship (p < {alpha:.3f})")
    else:
        print(f"⚠️ No significant relationship (p ≥ {alpha:.3f})")

    if r > 0:
        print("📈 Positive correlation")
    else:
        print("📉 Negative correlation")




In [7]:
test_pearson(terminos_df["ta_micromol_kg"], terminos_df["dic_micromol_kg"])

Pearson's correlation coefficient (r): 0.882
p-value: 0.0000
✔️ Significant relationship (p < 0.050)
📈 Positive correlation
