In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping

# Load the dataset
file_path = 'C:/Users/dell xps 15/Desktop/dataset/online_retail.csv'
data = pd.read_csv(file_path)
print(data.head)

# Select only the relevant columns (the ones you're using for prediction)
data = data[['InvoiceDate' , 'UnitPrice', 'CustomerID','Country','InvoiceNo','StockCode','Description','Quantity']]

# Set the target column (Weekly_Sales) and separate features (X) and target (y)
target_column = 'UnitPrice'  # Target column
X = data.drop(columns=[target_column])  # Features
y = data[target_column]  # Target (Weekly_Sales)

# Scale the features
scaler = MinMaxScaler(feature_range=(0, 1))
X_scaled = scaler.fit_transform(X)

# Reshape X to fit LSTM input requirements (3D input: [samples, time steps, features])
X_scaled = X_scaled.reshape((X_scaled.shape[0], 1, X_scaled.shape[1]))

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Build the LSTM model
model = Sequential()

# First LSTM layer with Dropout for regularization
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dropout(0.2))  # Dropout layer to prevent overfitting

# Second LSTM layer with Dropout
model.add(LSTM(units=50, return_sequences=False))
model.add(Dropout(0.2))  # Dropout layer

# Dense layer with BatchNormalization
model.add(Dense(units=25))
model.add(BatchNormalization())  # Normalize activations

# Output layer for regression
model.add(Dense(units=1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Use EarlyStopping to avoid overfitting and stop training when validation loss doesn't improve
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping])

# Evaluate the model
test_loss = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}")

# Make predictions on the test set
predictions_test = model.predict(X_test)
predictions_train = model.predict(X_train)  # Predictions for the training set

# Convert predictions and actual values into a pandas DataFrame for better handling
y_test_values = y_test.to_numpy()
predicted_values_test = predictions_test.flatten()

y_train_values = y_train.to_numpy()
predicted_values_train = predictions_train.flatten()

# Create a DataFrame to compare actual vs predicted values (test and train)
comparison_df_test = pd.DataFrame({
    'Actual Test': y_test_values,
    'Predicted Test': predicted_values_test
})

comparison_df_train = pd.DataFrame({
    'Actual Train': y_train_values,
    'Predicted Train': predicted_values_train
})

# Plot the actual vs predicted values for test and train
plt.figure(figsize=(12, 6))
plt.plot(comparison_df_test['Actual Test'], label='Actual Test', color='blue')
plt.plot(comparison_df_test['Predicted Test'], label='Predicted Test', color='red', linestyle='--')
plt.title('Actual vs Predicted Weekly Sales (Test Set)')
plt.xlabel('Data Points')
plt.ylabel('Weekly Sales')
plt.legend()
plt.grid(True)
plt.show()

plt.figure(figsize=(12, 6))
plt.plot(comparison_df_train['Actual Train'], label='Actual Train', color='blue')
plt.plot(comparison_df_train['Predicted Train'], label='Predicted Train', color='green', linestyle='--')
plt.title('Actual vs Predicted Weekly Sales (Train Set)')
plt.xlabel('Data Points')
plt.ylabel('Weekly Sales')
plt.legend()
plt.grid(True)
plt.show()

# Save the model after training
# model.save('improved_lstm_model.h5')


<bound method NDFrame.head of        InvoiceNo StockCode                          Description  Quantity  \
0         536365    85123A   WHITE HANGING HEART T-LIGHT HOLDER         6   
1         536365     71053                  WHITE METAL LANTERN         6   
2         536365    84406B       CREAM CUPID HEARTS COAT HANGER         8   
3         536365    84029G  KNITTED UNION FLAG HOT WATER BOTTLE         6   
4         536365    84029E       RED WOOLLY HOTTIE WHITE HEART.         6   
...          ...       ...                                  ...       ...   
541904    581587     22613          PACK OF 20 SPACEBOY NAPKINS        12   
541905    581587     22899         CHILDREN'S APRON DOLLY GIRL          6   
541906    581587     23254        CHILDRENS CUTLERY DOLLY GIRL          4   
541907    581587     23255      CHILDRENS CUTLERY CIRCUS PARADE         4   
541908    581587     22138        BAKING SET 9 PIECE RETROSPOT          3   

                InvoiceDate  UnitPrice  Custo

ValueError: could not convert string to float: '2010-12-01 08:26:00'