In [31]:
# Import required libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from datetime import datetime

In [32]:
# Load both CSV files
bottom_data = pd.read_csv('location 2 bottom.csv')
surface_data = pd.read_csv('location 2 surface.csv')



In [33]:
# Print column names to verify
print("Bottom Data Columns:", bottom_data.columns.tolist())
print("Surface Data Columns:", surface_data.columns.tolist())

Bottom Data Columns: ['Year/Month', 'Temp', 'pH', 'TDS', 'Conductivity', 'Turbidity', 'DO', 'BOD', 'COD', 'Hardness', 'Alkine', 'NO3', 'PO4', 'SO2']
Surface Data Columns: ['Year/Month', 'Temp', 'pH', 'TDS', 'Conductivity', 'Turbidity', 'DO', 'BDO', 'CDO', 'Hardnees', 'Alkanity', 'NO3  as N', 'Total PO43', 'SO42-']


In [34]:
# Cell 2: Preprocess Data
def preprocess_data(df):
    # Make a copy of the dataframe
    df = df.copy()
    
    # Convert Year/Month to datetime
    df['Date'] = pd.to_datetime(df['Year/Month'], format='%Y/%m')
    
    # Extract month as a cyclical feature
    df['Month_sin'] = np.sin(2 * np.pi * df['Date'].dt.month/12)
    df['Month_cos'] = np.cos(2 * np.pi * df['Date'].dt.month/12)
    
    # Extract year as a numerical feature
    df['Year'] = df['Date'].dt.year - df['Date'].dt.year.min()
    
    # Handle different column names in bottom and surface data
    # For NO3
    if 'NO3' in df.columns:
        df['NO3_cleaned'] = df['NO3']
    elif 'NO3  as N' in df.columns:
        df['NO3_cleaned'] = df['NO3  as N']
        
    # For SO4
    if 'SO2' in df.columns:
        df['SO4_cleaned'] = df['SO2']
    elif 'SO42-' in df.columns:
        df['SO4_cleaned'] = df['SO42-']
        
    # Ensure pH column is consistent
    df['pH_cleaned'] = df['pH']
    
    # Fill any NaN values with the mean of the column
    df['NO3_cleaned'] = df['NO3_cleaned'].fillna(df['NO3_cleaned'].mean())
    df['SO4_cleaned'] = df['SO4_cleaned'].fillna(df['SO4_cleaned'].mean())
    df['pH_cleaned'] = df['pH_cleaned'].fillna(df['pH_cleaned'].mean())
    
    return df

# Preprocess both datasets
bottom_processed = preprocess_data(bottom_data)
surface_processed = preprocess_data(surface_data)

# Combine datasets
combined_data = pd.concat([bottom_processed, surface_processed], axis=0)

# Select input features and target variables
input_features = ['Temp', 'Month_sin', 'Month_cos', 'Year']
target_features = ['NO3_cleaned', 'SO4_cleaned', 'pH_cleaned']

# Create input and output arrays
X = combined_data[input_features].values
y = combined_data[target_features].values

# Normalize the data
X_scaler = MinMaxScaler()
y_scaler = MinMaxScaler()

X_normalized = X_scaler.fit_transform(X)
y_normalized = y_scaler.fit_transform(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X_normalized, y_normalized, test_size=0.2, random_state=42
)


In [35]:
# Cell 3: Create Neural Network Model
def create_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(64, activation='relu', input_shape=(4,)),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(16, activation='relu'),
        tf.keras.layers.Dense(3)  # Output layer for NO3, SO4, and pH
    ])
    return model

model = create_model()


In [36]:
# Compile the model
model.compile(
    optimizer='adam',
    loss='mse',
    metrics=['mae']
)

# Define early stopping
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True
)

# Train the model
history = model.fit(
    X_train,
    y_train,
    epochs=100,
    batch_size=32,
    validation_split=0.2,
    callbacks=[early_stopping],
    verbose=1
)

# Evaluate the model
test_loss, test_mae = model.evaluate(X_test, y_test, verbose=0)
print(f"\nTest Mean Absolute Error: {test_mae:.4f}")

# Function to make predictions
def predict_water_quality(temperature, date_str):
    # Convert date string to features
    date = datetime.strptime(date_str, '%Y/%m')
    month_sin = np.sin(2 * np.pi * date.month/12)
    month_cos = np.cos(2 * np.pi * date.month/12)
    year = date.year - bottom_processed['Date'].dt.year.min()
    
    # Create input array
    input_data = np.array([[temperature, month_sin, month_cos, year]])
    
    # Normalize input
    input_normalized = X_scaler.transform(input_data)
    
    # Make prediction
    prediction_normalized = model.predict(input_normalized)
    
    # Denormalize prediction
    prediction = y_scaler.inverse_transform(prediction_normalized)
    
    return {
        'NO3': float(prediction[0][0]),
        'SO4': float(prediction[0][1]),
        'pH': float(prediction[0][2])
    }

# Example usage:
example_prediction = predict_water_quality(25.0, '2024/06')
print("\nExample Prediction:")
print(example_prediction)

# Print some actual data ranges for verification
print("\nData Ranges:")
print(f"NO3 range: {combined_data['NO3_cleaned'].min():.2f} to {combined_data['NO3_cleaned'].max():.2f}")
print(f"SO4 range: {combined_data['SO4_cleaned'].min():.2f} to {combined_data['SO4_cleaned'].max():.2f}")
print(f"pH range: {combined_data['pH_cleaned'].min():.2f} to {combined_data['pH_cleaned'].max():.2f}")

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100

Test Mean Absolute Error: 0.0995

Example Prediction:
{'NO3': 1.5245970487594604, 'SO4': 4.966458320617676, 'pH': 6.558625221252441}

Data Ranges:
NO3 range: 0.00 to 12.11
SO4 range: 0.00 to 33.00
pH range: 0.00 to 9.44
