In [None]:
import pandas as pd
import numpy as np
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
import matplotlib.pyplot as plt
from math import sqrt

In [None]:
%cd "C:/Users/Fousseini KOUYATE/Desktop/Paper_material/Data/"

In [None]:
data = pd.read_csv('DATA.csv', sep=";")
data['T'] = pd.to_datetime(data['T'])
Tim = data['T']
data['WL_Pank'] = data['WL_Pank'] * 0.01
data['WL_Sof'] = data['WL_Sof'] * 0.01
data['WL_Bou'] = data['WL_Bou'] * 0.01
data['WL_Dou'] = data['WL_Dou'] * 0.01
data['WL_Mop'] = data['WL_Mop'] * 0.01

data['P'] = data['P'] * 0.001
data

In [None]:
# Independantes (inputs) et dependante (target) - variables
X = data[['Tot_evap', 'WL_Bou', 'WL_Sof', 'WL_Pank', 'P', 'WL_Dou', 'WTD']]  # Inputs
y = data['WL_Mop']  # Target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

svm_model = SVR(kernel='rbf', C=1.0, epsilon=0.1)
svm_model.fit(X_train_scaled, y_train)

y_pred = svm_model.predict(X_test_scaled)

# Find the starting dates of X_train and X_test by using the indices to reference the original dataframe
starting_date_X_train = data.loc[X_train.index, 'T'].min()
starting_date_X_test = data.loc[X_test.index, 'T'].min()
ending_date_X_train = data.loc[X_train.index, 'T'].max()
ending_date_X_test = data.loc[X_test.index, 'T'].max()


In [None]:
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
import numpy as np

models = {
    'Random Forest': RandomForestRegressor(n_estimators=100),
    'Gradient Boosting': GradientBoostingRegressor(n_estimators=100, learning_rate=0.1),
    'SVM': SVR(kernel='rbf', C=1.0, epsilon=0.1),
    'XGBoosting': XGBRegressor(n_estimators=100)
}

results = {}
for name, model in models.items():
    # Training
    model.fit(X_train_scaled, y_train)
    # Prediction
    y_pred = model.predict(X_test_scaled)
    # Calculate RMSE
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    # Cross-validation
    scores = cross_val_score(model, X_train_scaled, y_train, cv=4, scoring='neg_mean_squared_error')
    mean_score = np.mean(np.sqrt(-scores))
    
    results[name] = {'RMSE': rmse, 'Cross Validation Score': mean_score}

In [None]:
best_model_name = min(results, key=lambda x: results[x]['Cross Validation Score'])
best_model_performance = results[best_model_name]

In [None]:
import matplotlib.pyplot as plt
model_names = list(results.keys())
rmse_scores = [results[name]['RMSE'] for name in model_names]
cv_scores = [results[name]['Cross Validation Score'] for name in model_names]
width  = 0.3
x = np.arange(len(model_names)) 
plt.rcParams.update({'font.size': 14})
plt.figure(figsize=(10, 6))
fig, ax = plt.subplots()
rects1 = ax.bar(x, rmse_scores, width, label='RMSE')
rects2 = ax.bar(x + width/2, cv_scores, width, label='CV Score')

ax.set_ylabel('RMSE (m)')
ax.set_title('(d)', loc='left')
ax.set_xticks(x)
ax.set_xticklabels(model_names)

plt.xticks(rotation=20)

In [None]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

y_pred = model.predict(X_test_scaled)

def calculate_nse(y_true, y_pred):
    return 1 - (np.sum((y_true - y_pred) ** 2) / np.sum((y_true - np.mean(y_true)) ** 2))

def calculate_pbias(y_true, y_pred):
    return (np.sum(y_true - y_pred) / np.sum(y_true)) * 100

def calculate_pabe(y_true, y_pred):
    return (np.sum(np.abs(y_true - y_pred)) / np.sum(y_true)) * 100

nse = calculate_nse(y_test, y_pred)
pbias = calculate_pbias(y_test, y_pred)
pabe = calculate_pabe(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

y_pred
X_test_scaled
predictions = {}

for model_name, model in models.items():
    predictions[model_name] = model.predict(X_test_scaled)

for model_name, model_predictions in predictions.items():
    rmse = np.sqrt(mean_squared_error(y_test, model_predictions))
    nse = calculate_nse(y_test, model_predictions)
    pabe = calculate_pabe(y_test, model_predictions)
    pbias = calculate_pbias(y_test, y_pred)
    r2 = r2_score(y_test, model_predictions)


In [None]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
# Defining the functions as provided
def calculate_nse(y_true, y_pred):
    return 1 - (np.sum((y_true - y_pred) ** 2) / np.sum((y_true - np.mean(y_true)) ** 2))

def calculate_pbias(y_true, y_pred):
    return (np.sum(y_true - y_pred) / np.sum(y_true)) * 100

def calculate_pabe(y_true, y_pred):
    return (np.sum(np.abs(y_true - y_pred)) / np.sum(y_true)) * 100

# Preparing to calculate the metrics for each model
evaluation_results = {}

for model_name, model in models.items():
    # Predicting with the current model
    model_predictions = model.predict(X_test_scaled)
    
    # Calculating metrics
    rmse = np.sqrt(mean_squared_error(y_test, model_predictions))
    nse = calculate_nse(y_test, model_predictions)
    pabe = calculate_pabe(y_test, model_predictions)
    pbias = calculate_pbias(y_test, model_predictions)
    r2 = r2_score(y_test, model_predictions)
    mae = mean_absolute_error(y_test, model_predictions)
    
    # Storing results
    evaluation_results[model_name] = {
        'RMSE': rmse, 
        'R^2': r2, 
        'NSE': nse, 
        'PABE': pabe, 
        'PBIAS': pbias,
        'MAE': mae
    }

evaluation_results


In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score

plt.rcParams.update({'font.size': 14})
# Assuming you have 4 models, for a 2x2 grid
fig, axs = plt.subplots(2, 2, figsize=(10, 6), sharey=True)
axs = axs.flatten()  # Flatten the 2x2 array to make iterating over it easier

# Définition des nouveaux titres pour chaque graphique
new_titles = ['(a)', '(b)', '(c)', '(d)']

for i, (model_name, model) in enumerate(models.items()):
    model_predictions = model.predict(X_test_scaled)
    axs[i].scatter(y_test, model_predictions, alpha=0.3, color='black')
    axs[i].plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
    # Positioning the title in the lower right inside the figure box
    axs[i].text(0.95, 0.05, new_titles[i], horizontalalignment='right', verticalalignment='bottom', transform=axs[i].transAxes, fontsize=14, color='black')
    r2 = r2_score(y_test, model_predictions)
    axs[i].text(0.05, 0.95, f'R²={r2:.3f}', transform=axs[i].transAxes, verticalalignment='top', color='black', fontsize=12)

# Set the ylabel for subplots (a) and (c)
axs[0].set_ylabel('Predicted water level (m)', fontsize=14)
axs[2].set_ylabel('Predicted water level (m)', fontsize=14)

# Set the xlabel for subplots (c) and (d)
axs[2].set_xlabel('Observed water level (m)', fontsize=14)
axs[3].set_xlabel('Observed water level (m)', fontsize=14)

plt.show()


In [None]:
XGBRegressor_model = XGBRegressor(n_estimators=100)
XGBRegressor_model.fit(X_train_scaled, y_train)
y_pred = XGBRegressor_model.predict(X_test_scaled)
rmse = sqrt(mean_squared_error(y_test, y_pred))
print(f'RMSE: {rmse}')
feature_importances = XGBRegressor_model.feature_importances_
plt.figure(figsize=(17, 6))
plt.barh(range(X.shape[1]), feature_importances, align='center')
plt.yticks(np.arange(X.shape[1]), X.columns, fontsize=16)
plt.xlabel('Feature Importance', fontsize=16)
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

# Supposons que X_train_scaled, y_train, X, et results sont déjà définis
# Instantiate the models
RandomForest_model = RandomForestRegressor(n_estimators=100)
GradientBoosting_model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1)

# Fit the models on the training data
RandomForest_model.fit(X_train_scaled, y_train)
GradientBoosting_model.fit(X_train_scaled, y_train)

# Get feature importances
RandomForest_feature_importances = RandomForest_model.feature_importances_
GradientBoosting_feature_importances = GradientBoosting_model.feature_importances_

plt.rcParams.update({'font.size': 14})
# Create a subplot grid of 2x2
fig, axs = plt.subplots(2, 2, figsize=(10, 6))

# Random Forest Feature Importances
axs[0, 0].barh(np.arange(X.shape[1]), RandomForest_feature_importances, align='center')
axs[0, 0].set_yticks(np.arange(X.shape[1]))
axs[0, 0].set_yticklabels(X.columns, fontsize=14)
axs[0, 0].set_title('(e)', loc='left')

# Gradient Boosting Feature Importances
axs[0, 1].barh(np.arange(X.shape[1]), GradientBoosting_feature_importances, align='center')
axs[0, 1].set_yticks(np.arange(X.shape[1]))
axs[0, 1].set_yticklabels(X.columns, fontsize=14)
axs[0, 1].set_title('(f)', loc='left')

# Placeholder for XGBoost (replace 'feature_importances' with actual values)
axs[1, 0].barh(np.arange(X.shape[1]), feature_importances, align='center')
axs[1, 0].set_yticks(np.arange(X.shape[1]))
axs[1, 0].set_xlabel('Feature Importance', fontsize=14)
axs[1, 0].set_yticklabels(X.columns, fontsize=14)
axs[1, 0].set_title('(g)', loc='left')

# Placeholder for RMSE and CV Scores
model_names = list(results.keys())
rmse_scores = [results[name]['RMSE'] for name in model_names]
cv_scores = [results[name]['Cross Validation Score'] for name in model_names]
width = 0.3
x = np.arange(len(model_names))
rects1 = axs[1, 1].bar(x - width/2, rmse_scores, width, label='RMSE')
rects2 = axs[1, 1].bar(x + width/2, cv_scores, width, label='CV Score')
axs[1, 1].set_ylabel('RMSE (m)')
axs[1, 1].set_title('(h)', loc='left')
axs[1, 1].set_xticks(x)
axs[1, 1].set_xticklabels(model_names, rotation=20, ha='right')

plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

# Assuming X_train_scaled, y_train, X, and results are already defined
# Instantiate the models
RandomForest_model = RandomForestRegressor(n_estimators=100)
GradientBoosting_model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1)

# Fit the models on the training data
RandomForest_model.fit(X_train_scaled, y_train)
GradientBoosting_model.fit(X_train_scaled, y_train)

# Get feature importances
RandomForest_feature_importances = RandomForest_model.feature_importances_
GradientBoosting_feature_importances = GradientBoosting_model.feature_importances_

# Adjust font size for all plots
plt.rcParams.update({'font.size': 14})

# Subplot for Random Forest Feature Importances
plt.figure(figsize=(5, 3))
plt.barh(np.arange(X.shape[1]), RandomForest_feature_importances, align='center')
plt.yticks(np.arange(X.shape[1]), X.columns, fontsize=14)
plt.xlabel('', fontsize=14)
plt.text(0.95, 0.95, '(a)', horizontalalignment='right', verticalalignment='top', transform=plt.gca().transAxes, fontsize=14)
plt.tight_layout()
plt.savefig("RandomForest_Feature_Importance.png", format='png', dpi=300)

# Subplot for Gradient Boosting Feature Importances
plt.figure(figsize=(5, 3))
plt.barh(np.arange(X.shape[1]), GradientBoosting_feature_importances, align='center')
plt.yticks(np.arange(X.shape[1]), X.columns, fontsize=14)
plt.xlabel('', fontsize=14)
plt.text(0.95, 0.95, '(b)', horizontalalignment='right', verticalalignment='top', transform=plt.gca().transAxes, fontsize=14)
plt.tight_layout()
plt.savefig("GradientBoosting_Feature_Importance.png", format='png', dpi=300)

# Placeholder for XGBoost Feature Importances (Assuming 'feature_importances' is defined)
plt.figure(figsize=(5, 3))
plt.barh(np.arange(X.shape[1]), feature_importances, align='center')  # Ensure 'feature_importances' is defined
plt.yticks(np.arange(X.shape[1]), X.columns, fontsize=14)
plt.xlabel('Feature Importance', fontsize=14)
plt.text(0.95, 0.95, '(c)', horizontalalignment='right', verticalalignment='top', transform=plt.gca().transAxes, fontsize=14)
plt.tight_layout()
plt.savefig("XGBoost_Feature_Importance.png", format='png', dpi=300)

# Placeholder for RMSE and CV Scores (Assuming 'results' is defined)
plt.figure(figsize=(5, 3))
model_names = list(results.keys())
rmse_scores = [results[name]['RMSE'] for name in model_names]
cv_scores = [results[name]['Cross Validation Score'] for name in model_names]
width = 0.3
x = np.arange(len(model_names))
plt.bar(x - width/2, rmse_scores, width, label='RMSE')
plt.bar(x + width/2, cv_scores, width, label='CV Score')
plt.ylabel('RMSE (m)', fontsize=12)
plt.text(0.05, 0.95, '(f)', horizontalalignment='left', verticalalignment='top', transform=plt.gca().transAxes, fontsize=14)
plt.xticks(x, model_names, rotation=10, ha='center', fontsize=12)
plt.tight_layout()
plt.savefig("RMSE_CV_Scores.png", format='png', dpi=300)

# Show the last figure
plt.show()


In [None]:
# Create a test_time Series with the correct index
test_time = Tim.loc[X_test.index]

# Ensure that the index of test_time aligns with y_test
test_time = test_time.reset_index(drop=True)

# Convert XGBRegressor_predictions to a NumPy array if it isn't already one
XGBRegressor_predictions = np.array(predictions['XGBoosting'])
# Convert XGBRegressor_predictions to a NumPy array if it isn't already one
SVR_predictions = np.array(predictions['SVM'])
# Convert XGBRegressor_predictions to a NumPy array if it isn't already one
GB_predictions = np.array(predictions['Gradient Boosting'])
# Convert XGBRegressor_predictions to a NumPy array if it isn't already one
RF_predictions = np.array(predictions['Random Forest'])

# Ensure that y_test is a Pandas Series with the correct index
y_test = pd.Series(y_test).reset_index(drop=True)

# Create the comparison DataFrame
df_comparison = pd.DataFrame({
    'Date': test_time,  # Adding the date column
    'Observed': y_test,
    'Predicted SVM': SVR_predictions,
    'Predicted Random Forest': RF_predictions
})

# Display the DataFrame
df_comparison

In [None]:
# Create the comparison DataFrame
df1_comparison = pd.DataFrame({
    'Date': test_time,  # Adding the date column
    'Observed': y_test,
    'Predicted SVM': SVR_predictions,
    'Predicted Random Forest': RF_predictions,
    'Predicted XGBoost': XGBRegressor_predictions,
    'Predicted Gradient Boosting': GB_predictions
})
df1_comparison

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Assuming the DataFrame df1_comparison is defined as follows:
date_range = pd.date_range(start='2013-05-27', end='2020-12-31', freq='D')
df1_comparison = pd.DataFrame({
    'Date': date_range,
    'Observed': y_test,
    'Predicted SVM': SVR_predictions,
    'Predicted Random Forest': RF_predictions,
    'Predicted XGBoost': XGBRegressor_predictions,
    'Predicted Gradient Boosting': GB_predictions
})

# Define the models you want to plot
models = ['SVM', 'Random Forest', 'XGBoost', 'Gradient Boosting']  # Match these names with your DataFrame columns

# Colors for each model
colors = ['blue', 'green', 'red', 'purple']

fig, axes = plt.subplots(nrows=4, ncols=1, figsize=(10, 14), sharex=True)

for idx, model in enumerate(models):
    axes[idx].plot(df1_comparison['Date'], df1_comparison['Observed'], label='Observed', color='black', linestyle='-', linewidth=1.5)
    axes[idx].plot(df1_comparison['Date'], df1_comparison[f'Predicted {model}'], label=f'{model}', color=colors[idx], linestyle='-', linewidth=1.5)
    axes[idx].set_ylabel('Water Level (m)')
    axes[idx].set_ylim(0, 7.5) 
    axes[idx].legend(loc='upper left', fontsize=14, framealpha=0, ncol=2)
    # Annotate in the bottom right with the figure label (a, b, c, d)
    axes[idx].annotate(f'({chr(97 + idx)})', xy=(0.95, 0.05), xycoords='axes fraction', fontsize=18, ha='right', va='bottom')

plt.tight_layout()
plt.show()
# Save the combined plot
fig.savefig("All_Models_Comparison.png", format='png', dpi=300)
plt.close(fig)  # Close the figure after saving to free up memory


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Assuming 'df1_comparison' DataFrame is already loaded and contains a 'Date' and relevant data columns
# Convert 'Date' to 'Year' and group by 'Year' to find maximum values (peak flows)
df1_comparison['Year'] = df1_comparison['Date'].dt.year
annual_peaks = df1_comparison.groupby('Year').max()

# Filtering out the 'Date' column since it's not needed for plotting the water levels
annual_peaks_numeric = annual_peaks.select_dtypes(include=[np.number])

# Setting up the bar plot with better spacing and color differentiation
plt.figure(figsize=(10, 6))

# Width of the bars adjusted to reduce overlap
width = 0.12

# Improved color scheme for better differentiation
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd']

# Set position of bars on X axis
positions = np.arange(len(annual_peaks_numeric))

# Enumerate through the columns to set up individual bar plots for each prediction model
for i, (column, color) in enumerate(zip(annual_peaks_numeric.columns, colors)):
    plt.bar(positions + i * width, annual_peaks_numeric[column], color=color, width=width, edgecolor='grey', label=column)


plt.ylabel('Peak Water Level (m)', fontsize=14)

# Add figure number at the top right side in the plot box
plt.text(0.99, 0.99, '(a)', transform=plt.gca().transAxes, fontsize=18, ha='right', verticalalignment='top')

# Adjust x-ticks to be in the center of the grouped bars
plt.xticks(positions + width * (len(annual_peaks_numeric.columns) - 1) / 2, annual_peaks_numeric.index, fontsize=16)
plt.yticks(fontsize=16)

# Set y-axis limits dynamically to ensure all data is well framed
plt.ylim(5, annual_peaks_numeric.max().max() * 1.1)  # Extending the y-axis limit based on the maximum value

# Place a legend at the top left of the plot box inside
plt.legend(loc='upper left', fontsize=14,  framealpha=0)

plt.tight_layout()
plt.savefig("Peak_Water_Levels_by_Model.png", format='png', dpi=300)
plt.show()


In [None]:
annual_peaks

In [None]:
# Assuming 'df1_comparison' DataFrame is already loaded and contains a 'Date' and relevant data columns
# For demonstration, I'll use the previously created DataFrame structure

# Convert 'Date' to 'Year'
df1_comparison['Year'] = df1_comparison['Date'].dt.year

# Group by 'Year' and find the minimum value for each year to get annual lows
annual_low = df1_comparison.groupby('Year').min()

# Setting up the bar plot
plt.figure(figsize=(7, 5))  # Adjusted for better visualization

# Width of the bars
width = 0.15

# Define colors for each predicted model and observed data
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd']

# Set position of bars on X axis
positions = np.arange(len(annual_low))

# Enumerate through the columns to set up individual bar plots for each prediction model
for i, (column, color) in enumerate(zip(annual_low.columns, colors)):
    if 'Predicted' in column or 'Observed' in column:
        # Plot bars with offset positions so they don't overlap
        plt.bar(positions + i * width, annual_low[column], color=color, width=width, edgecolor='grey', label=column)

plt.ylabel('Low Water Level (m)', fontsize=14)

# Add figure number at the top left side in the plot box
plt.text(0.01, 0.99, '(b)', transform=plt.gca().transAxes, fontsize=14, verticalalignment='top')

# Adjust x-ticks to be in the center of the grouped bars
plt.xticks(positions + width * (len(annual_low.columns) - 2) / 2, annual_low.index, fontsize=12)
plt.yticks(fontsize=12)

# Set y-axis limits to ensure all data is nicely framed
plt.ylim(0.4, 1.2)
plt.legend(loc='upper center', fontsize=8, framealpha=0)

plt.tight_layout()
plt.savefig("Annual_Low_Water_Levels.png", format='png', dpi=300)
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Assuming 'df1_comparison' DataFrame is already loaded and contains a 'Date' and relevant data columns
# Convert 'Date' to 'Year' and group by 'Year' to find the minimum values (annual lows)
df1_comparison['Year'] = df1_comparison['Date'].dt.year
annual_lows = df1_comparison.groupby('Year').min()

# Filtering out the 'Date' column since it's not needed for plotting the water levels
annual_lows_numeric = annual_lows.select_dtypes(include=[np.number])

# Setting up the bar plot with better spacing and color differentiation
plt.figure(figsize=(10, 6))

# Width of the bars adjusted to reduce overlap
width = 0.12

# Improved color scheme for better differentiation
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd']

# Set position of bars on X axis
positions = np.arange(len(annual_lows_numeric))

# Enumerate through the columns to set up individual bar plots for each prediction model
for i, (column, color) in enumerate(zip(annual_lows_numeric.columns, colors)):
    if 'Predicted' in column or 'Observed' in column:
        plt.bar(positions + i * width, annual_lows_numeric[column], color=color, width=width, edgecolor='grey', label=column)

plt.ylabel('Low Water Level (m)', fontsize=16)


# Add figure number at the top right side in the plot box
plt.text(0.99, 0.99, '(b)', transform=plt.gca().transAxes, fontsize=18, ha='right', verticalalignment='top')

# Adjust x-ticks to be in the center of the grouped bars
plt.xticks(positions + width * (len(annual_lows_numeric.columns) - 1) / 2, annual_lows_numeric.index, fontsize=16)
plt.yticks(fontsize=16)

# Set y-axis limits dynamically to ensure all data is well framed
plt.ylim(0, 1.7)  # Setting the upper limit to 2 as requested

# Place a legend at the top left of the plot box inside
plt.legend(loc='upper left', fontsize=14, framealpha=0)

plt.tight_layout()
plt.savefig("Annual_Low_Water_Levels_by_Model.png", format='png', dpi=300)
plt.show()


In [None]:
# Assuming df_comparison is already prepared and sorted chronologically.
years = range(2014, 2021)  # Example range of years

# Starting and ending indices for the slice
start_idx = 1130
end_idx = 1030 + 340

# Slicing the DataFrame to obtain only the first segment
data_filtered = df_comparison.iloc[start_idx:end_idx]

# Generate date labels for the x-axis based on the sliced segment
date_labels = df_comparison['Date'].iloc[start_idx:end_idx].dt.strftime('%Y-%m-%d').values

# Display frequency: e.g., every 10 days
display_frequency = (70)

# Filter date labels to display according to the chosen frequency
filtered_date_labels = date_labels[::display_frequency]
# Calculate the corresponding indices for these labels (for setting ticks)
display_indices = range(0, len(date_labels), display_frequency)
plt.rcParams.update({'font.size': 16})
# Create a figure and a single axis for only the first segment
fig, ax = plt.subplots(figsize=(10, 4))

# Plotting each series in the sliced segment
ax.plot(range(len(date_labels)), data_filtered['Observed'], color='k', linestyle='-', linewidth=1)
ax.plot(range(len(date_labels)), data_filtered['Predicted XGBoosting'], linestyle='-', linewidth=1)
ax.plot(range(len(date_labels)), data_filtered['Predicted SVM'], linestyle='-', linewidth=1)
ax.plot(range(len(date_labels)), data_filtered['Predicted Gradient Boosting'], linestyle='-', linewidth=1)
ax.plot(range(len(date_labels)), data_filtered['Predicted Random Forest'], linestyle='-', linewidth=1)

# Adjusting x-axis labels for readability
plt.xticks(ticks=display_indices, labels=filtered_date_labels, rotation=0, ha="center", fontsize=16)

# Setting Y-axis label
ax.set_ylabel('Water Level (m)', fontsize=16)

plt.tight_layout()
plt.savefig("Fig_zoom.png", format='png', dpi=300)
plt.show()

In [None]:
# First, ensure that the 'Year' column and 'annual_peaks' DataFrame are correctly set up as before.
df_comparison['Year'] = df_comparison['Date'].dt.year
annual_peaks = df_comparison.groupby('Year').max()

# Then, plotting each model's and the observed data's annual peak flow values.
plt.figure(figsize=(10, 6))

for column in annual_peaks.columns:
    if 'Predicted' in column or 'Observed' in column:
        plt.plot(annual_peaks.index, annual_peaks[column], marker='o', label=column)

plt.xlabel('Year', fontsize=18)
plt.ylabel('Low Water Level (m)', fontsize=17)
plt.title('')
plt.xticks(annual_peaks.index, rotation=0, fontsize=17)  # Ensure all years are shown and rotate for readability
plt.yticks(fontsize=17)
plt.legend(fontsize=17)
plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt

# Assuming 'df_comparison' and 'annual_peaks' are set up correctly
df_comparison['Year'] = df_comparison['Date'].dt.year
annual_peaks = df_comparison.groupby('Year').max()

# Setting up the plot
plt.figure(figsize=(10, 6))

# Plotting each model's and the observed data's annual peak flow values
for column in annual_peaks.columns:
    if 'Predicted' in column or 'Observed' in column:
        plt.plot(annual_peaks.index, annual_peaks[column], marker='o', label=column)

# Customizing labels and title
plt.xlabel('Year', fontsize=18)
plt.ylabel('Peak Water Level (m)', fontsize=17)  # Changed from 'Low' to 'Peak' for accuracy
plt.title('Annual Peak Water Levels by Model', fontsize=18)  # Added a title for clarity

# Setting x and y axis properties
plt.xticks(annual_peaks.index, rotation=0, fontsize=17)  # Ensure all years are shown
plt.yticks(fontsize=17)

# Setting y-axis limits from 5 to 7
plt.ylim(5, 7)

# Adding a legend
plt.legend(fontsize=12)  # Adjusted font size for better readability within the figure

# Adjusting layout
plt.tight_layout()

# Displaying the plot
plt.show()


In [None]:
# First, ensure that the 'Year' column and 'annual_peaks' DataFrame are correctly set up as before.
df_comparison['Year'] = df_comparison['Date'].dt.year
annual_low = df_comparison.groupby('Year').min()

# Then, plotting each model's and the observed data's annual peak flow values.
plt.figure(figsize=(10, 6))

for column in annual_peaks.columns:
    if 'Predicted' in column or 'Observed' in column:
        plt.plot(annual_low.index, annual_low[column], marker='o', label=column)

plt.xlabel('Year', fontsize=18)
plt.ylabel('Peak Water Level (m)', fontsize=18)
plt.title('Annual Peak Water Levels by Model')
plt.xticks(annual_peaks.index, rotation=0, fontsize=18)  # Ensure all years are shown and rotate for readability
plt.legend(fontsize=18)
plt.tight_layout()
plt.show()
