In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Bidirectional
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt

# Normalize data
def normalize_data(data):
    scaler = MinMaxScaler()
    scaler.fit(data)
    return scaler.transform(data), scaler

# Prepare input-output pairs
def prepare_data(data, sequence_length):
    X, y = [], []
    for i in range(len(data) - sequence_length):
        X.append(data[i:i+sequence_length])
        y.append(data[i+sequence_length])
    return np.array(X), np.array(y)

# Load your DataFrame (replace df with your DataFrame)
# df = pd.read_csv('your_file.csv')  # If reading from CSV file

# Assume df is your DataFrame
# Convert your DataFrame to numpy array
data = df.values.astype(float)

# Parameters
sequence_length = 50  # Length of input sequences

# Normalize data
data_normalized, scaler = normalize_data(data)

# Prepare input-output pairs
X, y = prepare_data(data_normalized, sequence_length)

# Split data into training and testing sets
split_index = int(0.8 * len(X))
X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

# Build Bi-LSTM model
model = Sequential([
    Bidirectional(LSTM(50, activation='relu', input_shape=(sequence_length, data.shape[1]))),
    Dense(data.shape[1])
])

# Compile model
model.compile(optimizer='adam', loss='mse')

# Train model
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test), verbose=2)

# Evaluate model
loss = model.evaluate(X_test, y_test, verbose=0)
print('Test Loss:', loss)

# Plot training and validation loss
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.legend()
plt.show()

# Make predictions
predictions_normalized = model.predict(X_test)

# Inverse normalization
predictions = scaler.inverse_transform(predictions_normalized)
y_test_actual = scaler.inverse_transform(y_test)

# Plot predictions against actual values
plt.plot(y_test_actual[:, 0], label='actual')
plt.plot(predictions[:, 0], label='predicted')
plt.legend()
plt.show()


In [40]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Bidirectional
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt

In [41]:
# Normalize data
def normalize_data(data):
    scaler = MinMaxScaler()
    scaler.fit(data)
    return scaler.transform(data), scaler

# Prepare input-output pairs
def prepare_data(data, sequence_length):
    X, y = [], []
    for i in range(len(data) - sequence_length):
        X.append(data[i:i+sequence_length])
        y.append(data[i+sequence_length])
    return np.array(X), np.array(y)

In [42]:
file_path = "C:\\Users\\RGIPT\\Desktop\\Drill_Bit\\drill_bit\\NOAA_Dataset_2010_to_2016.csv"


# Open the CSV file
df = pd.read_csv(file_path)

In [None]:
df['1_day_lag'] = df['netsolar'].iloc[-2142088:].tolist() + [None] * (len(df) - 2142088)

In [None]:
df['2_day_lag'] = df['netsolar'].iloc[-2140648:].tolist() + [None] * (len(df) - 2140648)

In [None]:
df['3_day_lag'] = df['netsolar'].iloc[-2139208:].tolist() + [None] * (len(df) - 2139208)

In [None]:
new = df.iloc[:-4320]
new

In [43]:
def format_index(data):
    """Create UTC localized DatetimeIndex for the dataframe.
    Parameters
    ----------
    data: Dataframe
        Must contain columns 'year', 'jday', 'hour' and
        'minute'.
    Return
    ------
    data: Dataframe
        Dataframe with a DatetimeIndex localized to UTC.
    """
    year = data.year.apply(str)
    jday = data.jday.apply(lambda x: '{:03d}'.format(x))
    hours = data.hour.apply(lambda x: '{:02d}'.format(x))
    minutes = data.minute.apply(lambda x: '{:02d}'.format(x))
    index = pd.to_datetime(year + jday + hours + minutes, format="%Y%j%H%M")
    data.index = index
    data = data.tz_localize('UTC')

    return data

In [44]:
df_new =format_index(new)

In [45]:
df_new.drop(columns=['year', 'month', 'day', 'minute', 'dt'], inplace=True)

In [46]:
df_new.interpolate(method='linear', inplace=True)

In [None]:
null_dict = {}
null_dict['Bondville_IL'] = df_new.isna().sum()
df_null = pd.DataFrame(null_dict)
df_null

In [47]:
df_new_h = df_new[['netsolar']]
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Bidirectional, Dense
from datetime import datetime

In [48]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Bidirectional, Dense
from datetime import datetime

In [49]:
time_series_data = df_new[['netsolar']].values

# Define function to create input sequences and corresponding outputs
def create_sequences(data, seq_length):
    sequences = []
    next_values = []
    for i in range(len(data) - seq_length):
        sequences.append(data[i:i+seq_length])
        next_values.append(data[i+seq_length])
    return np.array(sequences), np.array(next_values)

# Define sequence length and split data into input sequences and outputs
seq_length = 10  # adjust this according to your preference
X, y = create_sequences(time_series_data, seq_length)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [50]:
# Define BiLSTM model
model = Sequential()
model.add(Bidirectional(LSTM(units=50, activation='relu'), input_shape=(seq_length, 1)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# Reshape input data to fit the model input shape
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

  super().__init__(**kwargs)


In [51]:
# Fit model
model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test))


Epoch 1/100
[1m53480/53480[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m180s[0m 3ms/step - loss: 1287.9452 - val_loss: 939.3898
Epoch 2/100
[1m53480/53480[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m167s[0m 3ms/step - loss: 980.0408 - val_loss: 950.4987
Epoch 3/100
[1m53480/53480[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m146s[0m 3ms/step - loss: 949.9277 - val_loss: 959.2139
Epoch 4/100
[1m53480/53480[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m145s[0m 3ms/step - loss: 961.3642 - val_loss: 1018.4305
Epoch 5/100
[1m53480/53480[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m145s[0m 3ms/step - loss: 986.4617 - val_loss: 2348.2095
Epoch 6/100
[1m53480/53480[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m145s[0m 3ms/step - loss: 1020.6947 - val_loss: 963.6129
Epoch 7/100
[1m53480/53480[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m145s[0m 3ms/step - loss: 996.9278 - val_loss: 1052.6577
Epoch 8/100
[1m53480/53480[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m145

<keras.src.callbacks.history.History at 0x1ccbd65a4d0>

In [52]:
# Evaluate model
loss = model.evaluate(X_test, y_test)
print("Test Loss:", loss)

# Make predictions
predictions = model.predict(X_test)



[1m13370/13370[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 1ms/step - loss: 935.6636
Test Loss: 937.0733642578125
[1m13370/13370[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 1ms/step


In [53]:
# Calculate R2 score
r2 = r2_score(y_test, predictions)
print("R2 Score:", r2)

R2 Score: 0.978625688518909


In [54]:
# Save the model in the native Keras format
model.save('bi-LSTM.keras')
