In [14]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt

# Load the preprocessed dataset (assuming you have already performed data preprocessing steps)
df = pd.read_csv('./Watches_Bags_Accessories.csv', encoding='windows-1252')

# Normalize the data
scaler = MinMaxScaler()

columns = ['Rating in Stars', 'Rating Count', 'Voucher', 'Current Price', 'Original Price']

label_encoder = LabelEncoder()

# Encode 'Category' column
df['Rating in Stars'] = label_encoder.fit_transform(df['Rating in Stars'])
df['Rating Count'] = label_encoder.fit_transform(df['Rating Count'])
df['Voucher'] = label_encoder.fit_transform(df['Voucher'])
df['Current Price'] = label_encoder.fit_transform(df['Current Price'])
df['Original Price'] = label_encoder.fit_transform(df['Original Price'])

scaled_data = scaler.fit_transform(df[columns].astype(float))

# Define the number of time steps (e.g., past 12 months) for LSTM input
n_steps = 12
X, y = [], []
for i in range(n_steps, len(df)):
    X.append(scaled_data[i - n_steps:i, :])
    y.append(scaled_data[i, 3])  # Assuming 'Current Price' is the target variable

X, y = np.array(X), np.array(y)

# Split data into training and testing sets
split_ratio = 0.8
split_index = int(split_ratio * len(X))

X_train, X_test, y_train, y_test = X[:split_index], X[split_index:], y[:split_index], y[split_index:]

# Build the LSTM model
model = Sequential()
model.add(LSTM(units=50, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(units=1))
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), verbose=1)

# Evaluate the model
y_pred = model.predict(X_test)
y_pred = scaler.inverse_transform(np.concatenate((np.zeros((len(y_pred), 4)), y_pred), axis=1))[:, 3]  # Inverse scaling

mae = mean_absolute_error(df['Current Price'].iloc[split_index + n_steps:], y_pred)
mse = mean_squared_error(df['Current Price'].iloc[split_index + n_steps:], y_pred)
r2 = r2_score(df['Current Price'].iloc[split_index + n_steps:], y_pred)

print(f'Mean Absolute Error: {mae}')
print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

# Plot training and validation loss
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.show()

# Visualize actual vs predicted prices
plt.plot(df.index[split_index + n_steps:], df['Current Price'].iloc[split_index + n_steps:], label='Actual Price', color='blue')
plt.plot(df.index[split_index + n_steps:], y_pred, label='Predicted Price', color='red')
plt.xlabel('Time')
plt.ylabel('Price')
plt.title('Actual vs Predicted Price')
plt.legend()
plt.show()


ValueError: could not convert string to float: '4.6/5'