# 6Aug_ANN_code_model performace.**ipynb** **bold text**

In [None]:
from google.colab import files
uploaded = files.upload()


Saving Pr_Tmax_Tmin_SSP_5_8.5_MRI-ESM2-0.xlsx to Pr_Tmax_Tmin_SSP_5_8.5_MRI-ESM2-0.xlsx


In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping
import joblib
from google.colab import files

# Load historical data
historical_df = pd.read_excel('Pr_Tmax_Tmin_SSP_5_8.5_MRI-ESM2-0.xlsx')

# Convert month names to numerical values
month_to_num = {'JAN': 1, 'FEB': 2, 'MAR': 3, 'APR': 4, 'MAY': 5, 'JUN': 6,
                'JUL': 7, 'AUG': 8, 'SEP': 9, 'OCT': 10, 'NOV': 11, 'DEC': 12}
historical_df['Month'] = historical_df['Month'].map(month_to_num)

# Handle missing values in 'Month' column
historical_df.dropna(subset=['Month'], inplace=True)

# Check for any missing values
print("Missing values:", historical_df.isna().sum())

# Ensure data types are correct
print(historical_df.dtypes)

# Check for infinite values in numeric columns
numeric_cols = historical_df.select_dtypes(include=[np.number]).columns
print("Infinite values in numeric columns:", historical_df[numeric_cols].apply(np.isinf).sum())

# Extract predictors and predictands from historical data
X = historical_df[['Latitude', 'Longitude', 'Year', 'Month']].values
y = historical_df[['Tmax', 'Tmin', 'Pr']].values

# Normalize the data
scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y)

# Check for any anomalies in the data after scaling
print("X_scaled anomalies:", np.isnan(X_scaled).sum(), np.isinf(X_scaled).sum())
print("y_scaled anomalies:", np.isnan(y_scaled).sum(), np.isinf(y_scaled).sum())

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y_scaled, test_size=0.3, random_state=42)

# Define the ANN model
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(X_train.shape[1],)))
model.add(Dense(32, activation='relu'))
model.add(Dense(3, activation='linear'))  # Output layer for Tmax, Tmin, and Pr

model.compile(optimizer='adam', loss='mse')

# Early stopping to avoid overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train the model
history = model.fit(X_train, y_train, epochs=100, validation_data=(X_val, y_val), callbacks=[early_stopping], batch_size=32)

# Save the model in the native Keras format
model.save('ANN_model_Pr_Tmax_Tmin_SSP_5_8.5_MRI-ESM2-0.keras')

# Save scalers
joblib.dump(scaler_X, 'Pr_Tmax_Tmin_SSP_5_8.5_MRI-ESM2-0_scaler_X.pkl')
joblib.dump(scaler_y, 'Pr_Tmax_Tmin_SSP_5_8.5_MRI-ESM2-0_scaler_y.pkl')

print("ANN model training complete and saved to 'ann_model.keras'.")

# Download the model and scaler files
files.download("ANN_model_Pr_Tmax_Tmin_SSP_5_8.5_MRI-ESM2-0.keras")
files.download("Pr_Tmax_Tmin_SSP_5_8.5_MRI-ESM2-0_scaler_X.pkl")
files.download("Pr_Tmax_Tmin_SSP_5_8.5_MRI-ESM2-0_scaler_y.pkl")



Missing values: Year            0
Month           0
Station ID      0
Station Name    0
Latitude        0
Longitude       0
Tmax            0
Tmin            0
Pr              0
dtype: int64
Year              int64
Month             int64
Station ID       object
Station Name     object
Latitude        float64
Longitude       float64
Tmax            float64
Tmin            float64
Pr              float64
dtype: object
Infinite values in numeric columns: Year         0
Month        0
Latitude     0
Longitude    0
Tmax         0
Tmin         0
Pr           0
dtype: int64
X_scaled anomalies: 0 0
y_scaled anomalies: 0 0
Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m271/271[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 0.7757 - val_loss: 0.3664
Epoch 2/100
[1m271/271[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.3315 - val_loss: 0.3141
Epoch 3/100
[1m271/271[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.3125 - val_loss: 0.2977
Epoch 4/100
[1m271/271[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.2898 - val_loss: 0.2838
Epoch 5/100
[1m271/271[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.2765 - val_loss: 0.2741
Epoch 6/100
[1m271/271[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 0.2718 - val_loss: 0.2699
Epoch 7/100
[1m271/271[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.2542 - val_loss: 0.2673
Epoch 8/100
[1m271/271[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.2727 - val_loss: 0.2654
Epoch 9/100
[1m271/271[0m [32m━━━━━━━━━━━

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
from google.colab import files
uploaded = files.upload()

Saving ANN_model_Pr_Tmax_Tmin_SSP_5_8.5_MRI-ESM2-0.keras to ANN_model_Pr_Tmax_Tmin_SSP_5_8.5_MRI-ESM2-0 (1).keras
Saving Pr_Tmax_Tmin_SSP_5_8.5_MRI-ESM2-0_scaler_X.pkl to Pr_Tmax_Tmin_SSP_5_8.5_MRI-ESM2-0_scaler_X (1).pkl
Saving Pr_Tmax_Tmin_SSP_5_8.5_MRI-ESM2-0_scaler_y.pkl to Pr_Tmax_Tmin_SSP_5_8.5_MRI-ESM2-0_scaler_y (1).pkl


In [None]:
from keras.models import load_model
import joblib

# Load the trained model and scalers
model = load_model('ANN_model_Pr_Tmax_Tmin_SSP_5_8.5_MRI-ESM2-0 (1).keras')
scaler_X = joblib.load('Pr_Tmax_Tmin_SSP_5_8.5_MRI-ESM2-0_scaler_X (1).pkl')
scaler_y = joblib.load('Pr_Tmax_Tmin_SSP_5_8.5_MRI-ESM2-0_scaler_y (1).pkl')

# Load CMIP6 data for predictions
cmip6_df = pd.read_excel('Pr_Tmax_Tmin_SSP_5_8.5_MRI-ESM2-0.xlsx')

# Convert month names to numerical values if needed
cmip6_df['Month'] = cmip6_df['Month'].map(month_to_num)

# Prepare CMIP6 data for predictions
X_cmip6 = cmip6_df[['Latitude', 'Longitude', 'Year', 'Month']].values
X_cmip6_scaled = scaler_X.transform(X_cmip6)

# Make predictions
y_cmip6_scaled = model.predict(X_cmip6_scaled)

# Inverse transform predictions to get actual values
y_cmip6 = scaler_y.inverse_transform(y_cmip6_scaled)

# Add predictions to the DataFrame
cmip6_df['Predicted_Tmax'] = y_cmip6[:, 0]
cmip6_df['Predicted_Tmin'] = y_cmip6[:, 1]
cmip6_df['Predicted_Pr'] = y_cmip6[:, 2]

# Ensure non-negative precipitation values
cmip6_df['Predicted_Pr'] = np.maximum(cmip6_df['Predicted_Pr'], 0)

# Save the DataFrame with predictions to a CSV file
predictions_file = 'Pr_Tmax_Tmin_SSP_5_8.5_MRI-ESM2-0.csv'
cmip6_df.to_csv(predictions_file, index=False)

# Download the file (if using Google Colab)
files.download(predictions_file)


[1m387/387[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
from google.colab import files
uploaded = files.upload()

Saving Pr_Tmax_Tmin_SSP_1_2.6_GFDL-ESM4.csv to Pr_Tmax_Tmin_SSP_1_2.6_GFDL-ESM4 (1).csv


In [None]:
from google.colab import files
uploaded = files.upload()

Saving sorted_Obserevd_data_based_on_requriment.xls to sorted_Obserevd_data_based_on_requriment.xls


In [None]:
import pandas as pd

# Load observed data
observed_df = pd.read_excel('sorted_Obserevd_data_based_on_requriment.xls')

# Convert month names to numerical values
month_to_num = {'JAN': 1, 'FEB': 2, 'MAR': 3, 'APR': 4, 'MAY': 5, 'JUN': 6,
                'JUL': 7, 'AUG': 8, 'SEP': 9, 'OCT': 10, 'NOV': 11, 'DEC': 12}

observed_df['Month'] = observed_df['Month'].map(month_to_num)
observed_df = observed_df.dropna(subset=['Month'])

# Load predicted data
predicted_df = pd.read_csv('Pr_Tmax_Tmin_SSP_1_2.6_GFDL-ESM4 (1).csv')

# Clean Station Name columns
predicted_df['Station Name'] = predicted_df['Station Name'].str.strip()
observed_df['Station Name'] = observed_df['Station Name'].str.strip()

# Print unique values to check
print("Cleaned Station Name unique values in observed data:", observed_df['Station Name'].unique())
print("Cleaned Station Name unique values in predicted data:", predicted_df['Station Name'].unique())

# Print data types
print("Observed data types:\n", observed_df.dtypes)
print("Predicted data types:\n", predicted_df.dtypes)

# Print unique values
print("Unique values in observed data:")
print("Station Name:", observed_df['Station Name'].unique())
print("Latitude:", observed_df['Latitude'].unique())
print("Longitude:", observed_df['Longitude'].unique())
print("Year:", observed_df['Year'].unique())
print("Month:", observed_df['Month'].unique())

print("Unique values in predicted data:")
print("Station Name:", predicted_df['Station Name'].unique())
print("Latitude:", predicted_df['Latitude'].unique())
print("Longitude:", predicted_df['Longitude'].unique())
print("Year:", predicted_df['Year'].unique())
print("Month:", predicted_df['Month'].unique())

# Merge observed and predicted data on common columns
merged_df = pd.merge(observed_df, predicted_df, on=['Station Name', 'Latitude', 'Longitude', 'Year', 'Month'], how='inner', suffixes=('_obs', '_pred'))

# Check the merged data
print("Merged data:\n", merged_df.head())
print("Merged data shape:", merged_df.shape)

# Check missing values in merged data
print("Missing values in merged data:\n", merged_df.isnull().sum())




Cleaned Station Name unique values in observed data: ['Bahawalpur' 'Faisalabad' 'Hyderabad' 'Islamabad' 'Karachi' 'Lahore'
 'Larkana' 'Multan' 'Peshawar' 'Quetta' 'Sargodha' 'Sialkot']
Cleaned Station Name unique values in predicted data: ['Karachi' 'Hyderabad' 'Larkana' 'Quetta' 'Bahawalpur' 'Faisalabad'
 'Islamabad' 'Lahore' 'Multan' 'Peshawar' 'Sargodha' 'Sialkot']
Observed data types:
 Station Name      object
Latitude         float64
Longitude        float64
Year               int64
Month            float64
Observed_Tmax    float64
Observed_Tmin    float64
Observed_Pr      float64
dtype: object
Predicted data types:
 Year                int64
Month               int64
Station ID         object
Station Name       object
Latitude          float64
Longitude         float64
Tmax              float64
Tmin              float64
Pr                float64
Predicted_Tmax    float64
Predicted_Tmin    float64
Predicted_Pr      float64
dtype: object
Unique values in observed data:
Station Name

In [None]:
from google.colab import files
uploaded = files.upload()

Saving Pr_Tmax_Tmin_SSP_1_2.6_MRI-ESM2-0.csv to Pr_Tmax_Tmin_SSP_1_2.6_MRI-ESM2-0 (1).csv
Saving Pr_Tmax_Tmin_SSP_2_4.5_MRI-ESM2-0.csv to Pr_Tmax_Tmin_SSP_2_4.5_MRI-ESM2-0 (1).csv
Saving Pr_Tmax_Tmin_SSP_3_7.0_MRI-ESM2-0.csv to Pr_Tmax_Tmin_SSP_3_7.0_MRI-ESM2-0 (1).csv
Saving Pr_Tmax_Tmin_SSP_5_8.5_MRI-ESM2-0.csv to Pr_Tmax_Tmin_SSP_5_8.5_MRI-ESM2-0 (1).csv


In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error

# Performance functions
def correlation_coefficient(y_true, y_pred):
    return np.corrcoef(y_true, y_pred)[0, 1]

def nash_sutcliffe_efficiency(y_true, y_pred):
    return 1 - (np.sum((y_true - y_pred) ** 2) / np.sum((y_true - np.mean(y_true)) ** 2))

def root_mean_squared_error(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

def kling_gupta_efficiency(y_true, y_pred):
    r = np.corrcoef(y_true, y_pred)[0, 1]
    beta = np.mean(y_pred) / np.mean(y_true)
    alpha = np.std(y_pred) / np.std(y_true)
    return 1 - np.sqrt((r - 1) ** 2 + (alpha - 1) ** 2 + (beta - 1) ** 2)

def modified_index_of_agreement(y_true, y_pred):
    mean_obs = np.mean(y_true)
    obs_diff = y_true - mean_obs
    pred_diff = y_pred - mean_obs
    return 1 - (np.sum((y_true - y_pred) ** 2) / np.sum((np.abs(pred_diff) + np.abs(obs_diff)) ** 2))

# Load observed data
observed_df = pd.read_excel('sorted_Obserevd_data_based_on_requriment.xls')

# Convert month names to numerical values
month_to_num = {'JAN': 1, 'FEB': 2, 'MAR': 3, 'APR': 4, 'MAY': 5, 'JUN': 6,
                'JUL': 7, 'AUG': 8, 'SEP': 9, 'OCT': 10, 'NOV': 11, 'DEC': 12}
observed_df['Month'] = observed_df['Month'].map(month_to_num)

# Remove rows with 'ANNUAL' if they exist
observed_df = observed_df.dropna(subset=['Month'])

# Strip trailing spaces and special characters from 'Station Name'
observed_df['Station Name'] = observed_df['Station Name'].str.strip()

# List of prediction files
prediction_files = ['Pr_Tmax_Tmin_SSP_1_2.6_MRI-ESM2-0 (1).csv', 'Pr_Tmax_Tmin_SSP_2_4.5_MRI-ESM2-0 (1).csv','Pr_Tmax_Tmin_SSP_3_7.0_MRI-ESM2-0 (1).csv','Pr_Tmax_Tmin_SSP_5_8.5_MRI-ESM2-0 (1).csv']

# Initialize dictionary to store combined performance metrics
combined_performance_metrics = {
    'Tmax': {'CC': [], 'NSE': [], 'RMSE': [], 'KGE': [], 'md': []},
    'Tmin': {'CC': [], 'NSE': [], 'RMSE': [], 'KGE': [], 'md': []},
    'Precipitation': {'CC': [], 'NSE': [], 'RMSE': [], 'KGE': [], 'md': []}
}

# Process each prediction file
for file in prediction_files:
    # Load predicted data
    predicted_df = pd.read_csv(file)

    # Strip trailing spaces and special characters
    predicted_df['Station Name'] = predicted_df['Station Name'].str.strip()

    # Merge observed and predicted data on common columns
    merged_df = pd.merge(observed_df, predicted_df, on=['Station Name', 'Latitude', 'Longitude', 'Year', 'Month'], how='inner', suffixes=('_obs', '_pred'))

    # Extract relevant columns for performance evaluation
    observed_tmax = merged_df['Observed_Tmax']
    predicted_tmax = merged_df['Tmax']
    observed_tmin = merged_df['Observed_Tmin']
    predicted_tmin = merged_df['Tmin']
    observed_precip = merged_df['Observed_Pr']
    predicted_precip = merged_df['Pr']

    # Calculate performance metrics for Tmax
    combined_performance_metrics['Tmax']['CC'].append(correlation_coefficient(observed_tmax, predicted_tmax))
    combined_performance_metrics['Tmax']['NSE'].append(nash_sutcliffe_efficiency(observed_tmax, predicted_tmax))
    combined_performance_metrics['Tmax']['RMSE'].append(root_mean_squared_error(observed_tmax, predicted_tmax))
    combined_performance_metrics['Tmax']['KGE'].append(kling_gupta_efficiency(observed_tmax, predicted_tmax))
    combined_performance_metrics['Tmax']['md'].append(modified_index_of_agreement(observed_tmax, predicted_tmax))

    # Calculate performance metrics for Tmin
    combined_performance_metrics['Tmin']['CC'].append(correlation_coefficient(observed_tmin, predicted_tmin))
    combined_performance_metrics['Tmin']['NSE'].append(nash_sutcliffe_efficiency(observed_tmin, predicted_tmin))
    combined_performance_metrics['Tmin']['RMSE'].append(root_mean_squared_error(observed_tmin, predicted_tmin))
    combined_performance_metrics['Tmin']['KGE'].append(kling_gupta_efficiency(observed_tmin, predicted_tmin))
    combined_performance_metrics['Tmin']['md'].append(modified_index_of_agreement(observed_tmin, predicted_tmin))

    # Calculate performance metrics for Precipitation
    combined_performance_metrics['Precipitation']['CC'].append(correlation_coefficient(observed_precip, predicted_precip))
    combined_performance_metrics['Precipitation']['NSE'].append(nash_sutcliffe_efficiency(observed_precip, predicted_precip))
    combined_performance_metrics['Precipitation']['RMSE'].append(root_mean_squared_error(observed_precip, predicted_precip))
    combined_performance_metrics['Precipitation']['KGE'].append(kling_gupta_efficiency(observed_precip, predicted_precip))
    combined_performance_metrics['Precipitation']['md'].append(modified_index_of_agreement(observed_precip, predicted_precip))

# Calculate average performance metrics
average_performance_metrics = {}
for key in combined_performance_metrics:
    average_performance_metrics[key] = {metric: np.mean(values) for metric, values in combined_performance_metrics[key].items()}

# Print average performance metrics
print("Average Performance Metrics:")
for category, metrics in average_performance_metrics.items():
    print(f"  {category}:")
    for metric, value in metrics.items():
        print(f"    {metric}: {value:.4f}")

Average Performance Metrics:
  Tmax:
    CC: 0.8584
    NSE: -0.7451
    RMSE: 9.0682
    KGE: 0.2389
    md: 0.7979
  Tmin:
    CC: 0.9242
    NSE: 0.6788
    RMSE: 4.5673
    KGE: 0.7498
    md: 0.9333
  Precipitation:
    CC: 0.1279
    NSE: -0.2135
    RMSE: 74.8978
    KGE: -0.0347
    md: 0.3466
