Subset.py

In [3]:
import pandas as pd

def subset(file_paths=[]):

    dataframes = [pd.read_csv(file) for file in file_paths]
    combined_df = pd.concat(dataframes, ignore_index=True)

    return combined_df

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Dataset.py

In [2]:
from sklearn.model_selection import train_test_split
def dataset():
    file_paths = [
        '/content/drive/MyDrive/combined_features_with_date1.csv',
        # '/content/drive/MyDrive/combined_features_with_date2.csv',
        # '/content/drive/MyDrive/combined_features_with_date3.csv'
    ]

    Dataset = subset(file_paths=file_paths)

    # Step 4: Split the Dataset into Training and Testing data (80% training, 20% testing)
    TrainingData, TestData = train_test_split(Dataset, test_size=0.2, random_state=42)

    # Step 5: Export the datasets to CSV files
    TrainingData.to_csv("TrainingData.csv", index=False)
    TestData.to_csv("TestData.csv", index=False)

    with open("features_info.txt", "w") as f:
        f.write("Features used:\n")
        for feature in Dataset.columns:
            f.write(f"{feature}\n")

    print("Datasets have been successfully split and saved as 'TrainingData.csv' and 'TestData.csv'.")
    print("Features info saved as 'features_info.txt'.")

    return TrainingData, TestData


neuralnet.py

In [5]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np

# Define the model-building function
def build_model(input_size):
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=(input_size,)),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

def train_nn(TrainingData, epochs=10, batch_size=32):
    # Drop the 'date' column if it exists
    if 'date' in TrainingData.columns:
        TrainingData = TrainingData.drop(columns=['date'])

    # Check and drop any rows with missing values
    TrainingData = TrainingData.dropna()

    # Separate features and target
    X = TrainingData.drop(columns=["streamflow"]).values
    y = TrainingData["streamflow"].values

    # Scale features
    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    # y_scaler = StandardScaler()
    # y = y_scaler.fit_transform(y.reshape(-1, 1)).flatten()

    # Build and train the model
    input_size = X.shape[1]
    model = build_model(input_size)
    model.fit(X, y, epochs=epochs, batch_size=batch_size, validation_split=0.2, verbose=1)
    print("Model training complete.")

    return model, scaler

# Example usage
if __name__ == "__main__":
    # Use only a subset of the data for initial experimentation
    train_data = pd.read_csv('/content/TrainingData.csv').sample(10000, random_state=1)

    NNModel, scaler = train_nn(train_data)


Epoch 1/10
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 9.5850 - mae: 1.5623 - val_loss: 26.6379 - val_mae: 1.7102
Epoch 2/10
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 6.5904 - mae: 1.3626 - val_loss: 25.8568 - val_mae: 1.6641
Epoch 3/10
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 6.4158 - mae: 1.3415 - val_loss: 25.3278 - val_mae: 1.6057
Epoch 4/10
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 5.9126 - mae: 1.2632 - val_loss: 24.9514 - val_mae: 1.5752
Epoch 5/10
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 6.1643 - mae: 1.3344 - val_loss: 24.7768 - val_mae: 1.5634
Epoch 6/10
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 4.5635 - mae: 1.1736 - val_loss: 24.3696 - val_mae: 1.5664
Epoch 7/10
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 4.