In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
from time import time

# Load data
df = pd.read_excel('/content/New data.xlsx')  # Replace with the correct path
df['Datetime'] = pd.to_datetime(df['Datetime'])
df.set_index('Datetime', inplace=True)

# List of target columns
target_columns = ['Liquid (BLPD)', 'OIL (BOPD)', 'GAS (MCF)', 'WATER (BWPD)', 'GOR (MCF)',
                  'WHTP', 'DSP', 'THP', 'Pressure DS Choke', 'Temperature DS Choke', 'CHP', 'TAP','BCP']

# Filter data for training (June-August)
df_train = df['2000-06-01':'2000-08-31']
df_train['Day'] = df_train.index.day
df_train['Month'] = df_train.index.month
df_train['Hour'] = df_train.index.hour

# Normalize data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(df_train[target_columns])

time_scaler = MinMaxScaler(feature_range=(0, 1))
scaled_time = time_scaler.fit_transform(df_train[['Day', 'Month', 'Hour']])

# Combine target and time features
scaled_data_with_time = np.hstack([scaled_data, scaled_time])

def create_dataset(data, time_step=24):
    X, y = [], []
    for i in range(len(data) - time_step):
        X.append(data[i:(i + time_step), :])
        y.append(data[i + time_step, :-3])  # Exclude time features in y
    return np.array(X), np.array(y)

# Create sequences for training
time_step = 24
X, y = create_dataset(scaled_data_with_time, time_step)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Build LSTM model
lstm_model = Sequential()
lstm_model.add(LSTM(units=64, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
lstm_model.add(LSTM(units=64))
lstm_model.add(Dense(units=len(target_columns)))
lstm_model.compile(optimizer='adam', loss='mean_squared_error')

# Train model
start_time = time()
lstm_model.fit(X_train, y_train, epochs=100, batch_size=32)
training_time = time() - start_time

# Predict test set
y_test_pred = lstm_model.predict(X_test)

# Calculate evaluation metrics
mae = mean_absolute_error(y_test, y_test_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))

# Print metrics
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"Root Mean Square Error (RMSE): {rmse:.4f}")

# Predict September-October values
last_sequence = scaled_data_with_time[-time_step:]
predictions = []
for _ in range(60):  # Predict for 60 days
    input_seq = last_sequence.reshape(1, time_step, X_train.shape[2])
    predicted_values = lstm_model.predict(input_seq)
    predicted_values = np.maximum(predicted_values, 0)  # Ensure no negative predictions
    predictions.append(predicted_values)
    last_sequence = np.vstack([last_sequence[1:], np.hstack([predicted_values, [[0, 0, 0]]])])  # Append prediction with dummy time features

# Convert predictions to original scale
predictions = np.array(predictions).reshape(-1, len(target_columns))
predictions = scaler.inverse_transform(predictions)

# Create DataFrame with predicted values
prediction_dates = pd.date_range(start='2000-09-01', periods=60, freq='D')
predicted_df = pd.DataFrame(predictions, index=prediction_dates, columns=target_columns)
predicted_df.to_csv('future_predictions.csv')

# Output DataFrame
print(predicted_df)


Epoch 1/100


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train['Day'] = df_train.index.day
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train['Month'] = df_train.index.month
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train['Hour'] = df_train.index.hour
  super().__init__(**kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 0.1881
Epoch 2/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 23ms/step - loss: 0.0257
Epoch 3/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 23ms/step - loss: 0.0173
Epoch 4/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - loss: 0.0142
Epoch 5/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step - loss: 0.0127
Epoch 6/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step - loss: 0.0134
Epoch 7/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - loss: 0.0125
Epoch 8/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - loss: 0.0109
Epoch 9/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - loss: 0.0110
Epoch 10/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - loss: 0.0097
E