# First glance on Onetahi

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

# Meteo

In [None]:
#open the meteo data, the values are separated by ;

meteo_path = '/home/leo/Documents/These/Données/Meteo/Q_987_previous-1950-2023_RR-T-Vent.csv'
meteo_df = pd.read_csv(meteo_path, sep=';')
# print the values of the column 'NOM_USUEL' to see what stations are available
noms = meteo_df['NOM_USUEL'].unique()
print(noms)


In [None]:
# keep only the values where 'NOM_USUEL' == 'TETIAROA 1'
meteo_df = meteo_df[meteo_df['NOM_USUEL'] == 'TETIAROA 1']


In [None]:
# Add the UM variable (humidity) from the file Q_987_previous-1950-2023_autres-parametres.csv
meteo_path2 = '/home/leo/Documents/These/Données/Meteo/Q_987_previous-1950-2023_autres-parametres.csv'
meteo_df2 = pd.read_csv(meteo_path2, sep=';')
meteo_df2 = meteo_df2[meteo_df2['NOM_USUEL'] == 'TETIAROA 1']
meteo_df2 = meteo_df2[['AAAAMMJJ', 'UX', 'UM', 'DHUMI80']]

In [None]:
# Same for the Q_987_latest-2024-2025_*.csv
meteo_path3 = '/home/leo/Documents/These/Données/Meteo/Q_987_latest-2024-2025_RR-T-Vent.csv'
meteo_df3 = pd.read_csv(meteo_path3, sep=';')
meteo_df3 = meteo_df3[meteo_df3['NOM_USUEL'] == 'TETIAROA 1']
meteo_df3 = meteo_df3[['AAAAMMJJ', 'RR', 'TX', 'TM', 'FFM', 'TN']]
# Convert the 'AAAAMMJJ' column to datetime format and remove all values where the date is before 2014-01-01, same for df2 
meteo_df3['AAAAMMJJ'] = pd.to_datetime(meteo_df3['AAAAMMJJ'], format='%Y%m%d')


meteo_path3 = '/home/leo/Documents/These/Données/Meteo/Q_987_latest-2024-2025_autres-parametres.csv'
meteo_df4 = pd.read_csv(meteo_path3, sep=';')
meteo_df4 = meteo_df4[meteo_df4['NOM_USUEL'] == 'TETIAROA 1']
meteo_df4 = meteo_df4[['AAAAMMJJ', 'UX', 'UM', 'DHUMI80']]
meteo_df4['AAAAMMJJ'] = pd.to_datetime(meteo_df4['AAAAMMJJ'], format='%Y%m%d')

#merge meteo_df3 and meteo_df4
meteo_df3 = pd.merge(meteo_df3, meteo_df4, on='AAAAMMJJ', how='left')


In [None]:
# Keep only the AAAAMMJJ, RR, TX, TM and FXI2 columns
meteo_df = meteo_df[['AAAAMMJJ', 'RR', 'TX', 'TM', 'FFM', 'TN']]
#Add the UM column from meteo_df2 to meteo_df
meteo_df = pd.merge(meteo_df, meteo_df2, on='AAAAMMJJ', how='left')


In [None]:

# Convert the 'AAAAMMJJ' column to datetime format and remove all values where the date is before 2014-01-01, same for df2
meteo_df['AAAAMMJJ'] = pd.to_datetime(meteo_df['AAAAMMJJ'], format='%Y%m%d')
#meteo_df = meteo_df[meteo_df['AAAAMMJJ'] >= pd.to_datetime('2018-05-01')]

# Concatenate meteo_df and meteo_df3
meteo_df = pd.concat([meteo_df, meteo_df3], axis=0)
# Remove duplicates
meteo_df = meteo_df.drop_duplicates(subset=['AAAAMMJJ'])
# Sort by date
meteo_df = meteo_df.sort_values(by='AAAAMMJJ')

In [None]:
# Keep the values where the date is before 2024-11-20 00:00:00
meteo_df = meteo_df[meteo_df['AAAAMMJJ'] < pd.to_datetime('2024-11-20 00:00:00')]

meteo_df = meteo_df[meteo_df['AAAAMMJJ'] > pd.to_datetime('2019-07-01 00:00:00')]

In [None]:
meteo_df.head()

In [None]:
#plot Humidity (UM) over time
plt.figure(figsize=(10, 5))
plt.plot(meteo_df['AAAAMMJJ'], meteo_df['TM'], label='Humidity (UM)', color='blue')
plt.xlabel('Date')
plt.ylabel('Humidity (%)')
plt.title('Humidity over Time at Tetiaroa 1')
plt.legend()
plt.grid()
plt.show()

In [None]:
# Now take the min and max values for each day
meteo_df_min = meteo_df.groupby('AAAAMMJJ').min().reset_index()
meteo_df_max = meteo_df.groupby('AAAAMMJJ').max().reset_index()

# Now do a single data set with the sum of RR for each day, only with the RR column
meteo_df_sum = meteo_df.groupby('AAAAMMJJ')['RR'].sum().reset_index()
meteo_df_sum['precip_1week'] = meteo_df_sum['RR'].rolling(window=7, min_periods=1).sum()
meteo_df_sum['precip_2weeks'] = meteo_df_sum['RR'].rolling(window=14, min_periods=1).sum()
meteo_df_sum['precip_1month'] = meteo_df_sum['RR'].rolling(window=28, min_periods=1).sum()

# Take a sliding mean that take sinto account the previous two days and the next wo days for TM
meteo_df_min['TM_sliding_mean'] = meteo_df_min['TM'].rolling(window=5, min_periods=1).mean()
meteo_df['TM_sliding_mean'] = meteo_df['TM'].rolling(window=5, min_periods=1).mean()


In [None]:
fig, axs = plt.subplots(2, 5, figsize=(20, 10))
date_min = meteo_df['AAAAMMJJ'].min()
date_max = meteo_df['AAAAMMJJ'].max()
# Plot 1: Number of Individuals by Date (Capture)
sns.scatterplot(data=meteo_df, x='AAAAMMJJ', y='UM', s=20, ax=axs[0, 0])
axs[0, 0].set_title('Mean humidity par jour')
axs[0, 0].set_xlabel('Date')
axs[0, 0].set_xlim(date_min, date_max)
axs[0, 0].set_ylabel('Humidity (%)')
axs[0, 0].grid(True)
axs[0, 0].tick_params(axis='x', rotation=45)

# Plot 2: Temperature minimale par jour
sns.scatterplot(data=meteo_df_min, x='AAAAMMJJ', y='TM', s=20, ax=axs[0, 1])
axs[0, 1].set_title('Temperature minimale par jour')
axs[0, 1].set_xlabel('Date')
axs[0, 1].set_xlim(date_min, date_max)
axs[0, 1].set_ylabel('Temperature')
axs[0, 1].grid(True)
axs[0, 1].tick_params(axis='x', rotation=45)

# Plot 3: Température Moyenne par jour
sns.scatterplot(data=meteo_df, x='AAAAMMJJ', y='TM', s=20, ax=axs[0, 2])
axs[0, 2].set_title('Température Moyenne par jour')
axs[0, 2].set_xlabel('Date')
axs[0, 2].set_xlim(date_min, date_max)
axs[0, 2].set_ylabel('Temperature moyenne')
axs[0, 2].grid(True)
axs[0, 2].tick_params(axis='x', rotation=45)

# Plot 4: Précipitation par jour
sns.scatterplot(data=meteo_df_sum, x='AAAAMMJJ', y='RR', s=20, ax=axs[0, 3])
axs[0, 3].set_title('Précipitation par jour')
axs[0, 3].set_xlabel('Date')
axs[0, 3].set_xlim(date_min, date_max)
axs[0, 3].set_ylabel('Précipitation')
axs[0, 3].grid(True)
axs[0, 3].tick_params(axis='x', rotation=45)

# Plot 5: FFM values
sns.scatterplot(data=meteo_df, x='AAAAMMJJ', y='TM_sliding_mean', s=20, ax=axs[0, 4])
axs[0, 4].set_title('Temperature moyenne avec moyenne glissante par jour')
axs[0, 4].set_xlabel('Date')
axs[0, 4].set_xlim(date_min, date_max)
axs[0, 4].set_ylabel('FFM')
axs[0, 4].grid(True)
axs[0, 4].tick_params(axis='x', rotation=45)

# Plot 6: Temperature mpyenne glissante
sns.scatterplot(data=meteo_df_min, x='AAAAMMJJ', y='TM_sliding_mean', s=20, ax=axs[1, 0])
axs[1, 0].set_title('Temperature minimale avec moyenne glissante par jour')
axs[1, 0].set_xlabel('Date')
axs[1, 0].set_xlim(date_min, date_max)
axs[1, 0].set_ylabel('Temperature')
axs[1, 0].grid(True)
axs[1, 0].tick_params(axis='x', rotation=45)

sns.scatterplot(data=meteo_df_sum, x='AAAAMMJJ', y='precip_1month', s=20, ax=axs[1, 2])
axs[1, 2].set_title('Cumul precip 1 mois')
axs[1, 2].set_xlabel('Date')
axs[1, 2].set_xlim(date_min, date_max)
axs[1, 2].set_ylabel('Précipitation (mm)')
axs[1, 2].grid(True)
axs[1, 2].tick_params(axis='x', rotation=45)

# Plot 9: FFM values
sns.scatterplot(data=meteo_df_sum, x='AAAAMMJJ', y='precip_1week', s=20, ax=axs[1, 3])
axs[1, 3].set_title('Cumul de précipitations sur 1 semaine')
axs[1, 3].set_xlabel('Date')
axs[1, 3].set_xlim(date_min, date_max)
axs[1, 3].set_ylabel('Humidité (%)')
axs[1, 3].grid(True)
axs[1, 3].tick_params(axis='x', rotation=45)

# Plot 10: Temperature mpyenne glissante
sns.scatterplot(data=meteo_df_sum, x='AAAAMMJJ', y='precip_2weeks', s=20, ax=axs[1, 4])
axs[1, 4].set_title('Cumul de précipitations sur 2 semaines')
axs[1, 4].set_xlabel('Date')
axs[1, 4].set_xlim(date_min, date_max)
axs[1, 4].set_ylabel('Ensoleillement (J/cm2)')
axs[1, 4].grid(True)
axs[1, 4].tick_params(axis='x', rotation=45)

# save the plot to a file
plt.savefig('../img/meteo_plot.png')
plt.tight_layout()
plt.show()


# Creation of the meteo dataframe for simulations

We want to keep and precip_2week only, with a linear interpolation to get a dataset with time step of np.linspace(0, 1552, 100001)

In [None]:
dt_new = 0.01
# Create a time vector from 0 to (number of days - 1)

time_original = np.arange(len(meteo_df_min))
time_new = np.arange(time_original.min(), time_original.max(), dt_new)
temp_new = np.interp(time_new, time_original, meteo_df['TM'])
precip2_new = np.interp(time_new, time_original, meteo_df_sum['precip_2weeks'])
precip1_new = np.interp(time_new, time_original, meteo_df_sum['precip_1week'])
precip4_new = np.interp(time_new, time_original, meteo_df_sum['precip_1month'])
precip_new = np.interp(time_new, time_original, meteo_df['RR'])
hum_new = np.interp(time_new, time_original, meteo_df['UM'])

In [None]:
# In the dataframe, we want to keep track of the date
date = meteo_df['AAAAMMJJ'].reset_index(drop=True)
# Calculate frequency in minutes or seconds to avoid ZeroDivisionError
freq_hours = dt_new * 24
freq_minutes = dt_new * 24 * 60
freq_seconds = dt_new * 24 * 60 * 60

if int(freq_hours) >= 1:
	freq_str = f'{int(freq_hours)}H'
elif int(freq_minutes) >= 1:
	freq_str = f'{int(freq_minutes)}min'
else:
	freq_str = f'{int(freq_seconds)}s'

date_new = pd.date_range(start=date.min(), periods=len(time_new), freq=freq_str)
date_new

In [None]:
df_resampled = pd.DataFrame({'date': date_new, 'time': time_new, 'TM': temp_new, 'precip': precip_new, 'precip1': precip1_new, 'precip2': precip2_new, 'precip4': precip4_new, 'UM': hum_new})

In [None]:
df_resampled.head()

In [None]:
import os

# Ensure the directory exists
os.makedirs('../Data', exist_ok=True)

# Export to csv
#df_resampled.to_csv('../Data/meteo_tetiaroa_resampled.csv', index=False)

# Plot some meteo variables with the capture data

In [None]:
capture_df = pd.read_csv('../Data/combined_poly_capture.csv')
capture_df['Date'] = pd.to_datetime(capture_df['Date'])

In [None]:
#Plot of the capture data for the motu Oroatera with temperature and precipitation
fig, axs = plt.subplots(2,1, figsize=(15, 9))
axs[0].step(date_new, temp_new, where="post", label='Mean daily temperature', color='red')
axs[0].set_xlabel('Days')
axs[0].set_ylabel('Temperature (°C)', color='red')
axs[0].tick_params(axis='y', labelcolor='red')
axs[0].set_title('Temperature and Capture Females on Oroatera Motu')
axs[0].set_ylim(0, max(temp_new) * 1.1)
axs[0].legend(loc='upper left')
ax2 = axs[0].twinx()
motu_data = capture_df[capture_df['Motu'] == 'Oroatera']
ax2.bar(motu_data['Date'], motu_data['Nb_ind'], width=5, alpha=0.6, label='Captured', color='orange')
ax2.set_ylabel('Number of Captured Individuals', color='orange')
ax2.tick_params(axis='y', labelcolor='orange')
ax2.set_ylim(0, max(motu_data['Nb_ind']) * 1.1)
ax2.legend(loc='upper right')

axs[1].step(date_new, precip4_new, where="post", label='Cumulated Precipitation', color='green')
axs[1].set_xlabel('Days')
axs[1].set_ylabel('Precipitation (mm)', color='green')
axs[1].tick_params(axis='y', labelcolor='green')
axs[1].set_title('Precipitation and Capture Females on Oroatera Motu')
axs[1].set_ylim(0, max(precip4_new) * 1.1)
axs[1].legend(loc='upper center')
ax2 = axs[1].twinx()
ax2.bar(motu_data['Date'], motu_data['Nb_ind'], width=5, alpha=0.6, label='Captured', color='orange')
ax2.set_ylabel('Number of Captured Individuals', color='orange')
ax2.tick_params(axis='y', labelcolor='orange')
ax2.set_ylim(0, max(motu_data['Nb_ind']) * 1.1)
ax2.legend(loc='upper right')

plt.tight_layout()
plt.show()