## Importing and explore enviromental data

In [1]:
import os
def import_csv(file):
    # Read the file into a DataFrame: df
    import pandas as pd
    return pd.read_csv(file)

In [19]:
# Path to the file to be imported
path = os.getcwd()

# Import the file
CO2Data = import_csv(os.path.join(path, 'Terminos_lagoon_TA_DIC_2023_RawData.csv'))

## Exploring DataFrame

In [20]:
# Print the number of rows and columns in the DataFrame
print(CO2Data.shape)

(106, 21)


In [21]:
# Print the head of the DataFrame
print(CO2Data.head())

   sample      date     estuary   area station layer_depth season  \
0  CDL01S  5/3/2020  Candelaria  River   CDL01     Surface    Dry   
1  CDL01F  5/3/2020  Candelaria  River   CDL01      Bottom    Dry   
2  CDL02S  5/3/2020  Candelaria  River   CDL02     Surface    Dry   
3  CDL02F  5/3/2020  Candelaria  River   CDL02      Bottom    Dry   
4  CDL03S  5/3/2020  Candelaria  River   CDL03     Surface    Dry   

   chlorophy_microg_l  cond_microsiemens_cm  depth_m  ...  do_mg_l  sal_psu  \
0                0.36                7015.4    0.464  ...     7.12     3.56   
1                4.19               29886.1    7.792  ...     4.90    16.97   
2                0.92               16691.1    0.453  ...     6.99     8.94   
3                2.23               24847.4    1.261  ...     6.52    13.87   
4                0.58               46341.6    0.465  ...     6.24    28.06   

   sp_cond_microsiemens_cm  turbidity_fnu  temp_c  latitude  longitude  \
0                   6547.7          

In [22]:
# Print information about the DataFrame
print(CO2Data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 106 entries, 0 to 105
Data columns (total 21 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   sample                   106 non-null    object 
 1   date                     106 non-null    object 
 2   estuary                  106 non-null    object 
 3   area                     106 non-null    object 
 4   station                  106 non-null    object 
 5   layer_depth              106 non-null    object 
 6   season                   106 non-null    object 
 7   chlorophy_microg_l       106 non-null    float64
 8   cond_microsiemens_cm     106 non-null    float64
 9   depth_m                  106 non-null    float64
 10  do_percent_sat           106 non-null    float64
 11  do_mg_l                  106 non-null    float64
 12  sal_psu                  106 non-null    float64
 13  sp_cond_microsiemens_cm  106 non-null    float64
 14  turbidity_fnu            1

In [6]:
# Print descriptive statistics
print(CO2Data.describe())

       chlorophy_microg_l  cond_microsiemens_cm     depth_m  do_percent_sat  \
count          106.000000            106.000000  106.000000      106.000000   
mean             6.545472          27895.183962    1.830160       89.515094   
std             14.941262          20931.232513    2.038739       29.772291   
min              0.360000             13.800000    0.105000        1.700000   
25%              2.555000           1778.025000    0.428750       84.575000   
50%              3.705000          33202.600000    0.638500       97.100000   
75%              5.925000          47046.650000    2.883250      105.300000   
max            150.900000          59988.600000    8.558000      174.100000   

          do_mg_l     sal_psu  sp_cond_microsiemens_cm  turbidity_fnu  \
count  106.000000  106.000000               106.000000     106.000000   
mean     6.474340   17.331981             27126.980189     100.429623   
std      2.104254   13.578980             20527.530804     290.290113

In [7]:
# Use the Pandas isnull() function to identify the number of missing values in each column.
print(CO2Data.isnull().sum())

sample                     0
date                       0
estuary                    0
area                       0
station                    0
layer_depth                0
season                     0
chlorophy_microg_l         0
cond_microsiemens_cm       0
depth_m                    0
do_percent_sat             0
do_mg_l                    0
sal_psu                    0
sp_cond_microsiemens_cm    0
turbidity_fnu              0
temp_c                     0
latitude                   0
longitude                  0
dic_micromol_kg            0
ta_micromol_kg             0
dummy_data                 7
dtype: int64


In [8]:
# Create new data frame 
CO2Data_fill = CO2Data.copy()

# Use fill method 
CO2Data_fill = CO2Data_fill.fillna(method="ffill")

In [9]:
print(CO2Data_fill.isnull().sum())

sample                     0
date                       0
estuary                    0
area                       0
station                    0
layer_depth                0
season                     0
chlorophy_microg_l         0
cond_microsiemens_cm       0
depth_m                    0
do_percent_sat             0
do_mg_l                    0
sal_psu                    0
sp_cond_microsiemens_cm    0
turbidity_fnu              0
temp_c                     0
latitude                   0
longitude                  0
dic_micromol_kg            0
ta_micromol_kg             0
dummy_data                 0
dtype: int64


In [10]:
CO2Data_fill_linear = CO2Data.copy()

CO2Data_fill_linear = CO2Data_fill_linear.interpolate(method='linear')

print(CO2Data_fill_linear.isnull().sum())

sample                     0
date                       0
estuary                    0
area                       0
station                    0
layer_depth                0
season                     0
chlorophy_microg_l         0
cond_microsiemens_cm       0
depth_m                    0
do_percent_sat             0
do_mg_l                    0
sal_psu                    0
sp_cond_microsiemens_cm    0
turbidity_fnu              0
temp_c                     0
latitude                   0
longitude                  0
dic_micromol_kg            0
ta_micromol_kg             0
dummy_data                 0
dtype: int64


In [11]:
#Crear nueva columna TA/DIC 
CO2Data['TA_DIC_ratio'] = CO2Data['ta_micromol_kg'] / CO2Data['dic_micromol_kg']
print(CO2Data['TA_DIC_ratio'])

0      0.986718
1      0.996485
2      0.995704
3      1.088628
4      1.032445
         ...   
101    1.050092
102    1.075436
103    1.095475
104    1.076392
105    1.093627
Name: TA_DIC_ratio, Length: 106, dtype: float64


In [17]:
#Calcular la media y desviación estándar 
seasonal_stats = CO2Data.groupby('season')['TA_DIC_ratio'].agg(['mean', 'std'])
print(seasonal_stats)

            mean       std
season                    
Dry     1.058558  0.086111
Rainy   1.022350  0.100924


In [13]:
#Calcular la media y desviación estándar por temporada y área
season_area_stats= CO2Data.groupby(['season', 'area'])['TA_DIC_ratio'].agg(['mean', 'std'])
print(season_area_stats)


                  mean       std
season area                     
Dry    Coast  1.115766  0.115033
       Plume  1.059687  0.035574
       River  1.000221  0.037747
Rainy  Coast  1.090127  0.129658
       Plume  1.025544  0.049222
       River  0.945217  0.025084


In [24]:
#Guradar resultados en un excel
import pandas as pd


with pd.ExcelWriter('TA_DIC_Season_Areas.xlsx') as writer:
    seasonal_stats.to_excel(writer, sheet_name='Seasonal_Stats')
    season_area_stats.to_excel(writer, sheet_name='Season_Area_Stats')

print('Archivo guardado exitosamente.')

Archivo guardado exitosamente.
