In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD, RMSprop, Adam, Adagrad, Adadelta
from keras.callbacks import EarlyStopping, ModelCheckpoint, CSVLogger
import matplotlib.pyplot as plt
from datetime import timedelta
import seaborn as sns

# Load data
data = pd.read_excel('discharge.xlsx')

# Convert the date column to datetime format
data['date'] = pd.to_datetime(data['date'])

# Drop the second column (Unnamed: 1) if it exists
if 'Unnamed: 1' in data.columns:
    data = data.drop(columns=['Unnamed: 1'])

# Ensure there are no missing values in the 'q' column
data['q'].fillna(method='ffill', inplace=True)

# Feature Engineering
data['month'] = data['date'].dt.month
data['day_of_year'] = data['date'].dt.dayofyear
data['is_weekend'] = data['date'].dt.weekday >= 5

# Calculate the split date for 80% training and 20% testing
total_days = (data['date'].max() - data['date'].min()).days
train_days = int(total_days * 0.8)
split_date = data['date'].min() + timedelta(days=train_days)

# Split the data
train_data = data[data['date'] < split_date]
test_data = data[data['date'] >= split_date]

# Extract input and output variables
X_train = train_data[['q', 'month', 'day_of_year', 'is_weekend']].values
y_train = train_data['q'].values.reshape(-1, 1)
X_test = test_data[['q', 'month', 'day_of_year', 'is_weekend']].values
y_test = test_data['q'].values.reshape(-1, 1)