<a href="https://colab.research.google.com/github/Ganesh-Navadeep/Predictive-Analytics/blob/main/Shopping_suggestions_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Embedding, Dense, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer

# 1. Load the dataset
events_df = pd.read_csv("/content/events.csv")  # Make sure the file is in the working directory

# 2. Preprocess
events_df = events_df[['timestamp', 'visitorid', 'event', 'itemid']]
events_df = events_df.sort_values(by=['visitorid', 'timestamp'])

# Map event types to codes
event_mapping = {'view': 1, 'addtocart': 2, 'transaction': 3}
events_df['event_code'] = events_df['event'].map(event_mapping)

# Group by user session
user_sequences = events_df.groupby('visitorid').agg(list)

# Create input sequences and labels
sequences = []
labels = []

for events, items in zip(user_sequences['event_code'], user_sequences['itemid']):
    if len(events) < 2:
        continue
    seq = ['{}_{}'.format(i, e) for i, e in zip(items[:-1], events[:-1])]
    label = 1 if events[-1] == 3 else 0
    sequences.append(seq)
    labels.append(label)

# 3. Encode and pad sequences
tokenizer = Tokenizer()
tokenizer.fit_on_texts(sequences)
X = tokenizer.texts_to_sequences(sequences)
X = pad_sequences(X, padding='post')
y = np.array(labels)

# 4. Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 5. Build the LSTM model
model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=64, input_length=X.shape[1]))
model.add(LSTM(64))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 6. Train
model.fit(X_train, y_train, validation_split=0.2, epochs=3, batch_size=256, verbose=1)

# 7. Evaluate
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype(int)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

# 8. Print metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print("\nClassification Report:\n", report)



Epoch 1/3
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m426s[0m 3s/step - accuracy: 0.9824 - loss: 0.1512 - val_accuracy: 0.9807 - val_loss: 0.0956
Epoch 2/3
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m435s[0m 3s/step - accuracy: 0.9829 - loss: 0.0909 - val_accuracy: 0.9807 - val_loss: 0.0965
Epoch 3/3
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m439s[0m 3s/step - accuracy: 0.9846 - loss: 0.0830 - val_accuracy: 0.9807 - val_loss: 0.0961
[1m393/393[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 120ms/step
Accuracy: 0.9822
Precision: 0.0000

Classification Report:
               precision    recall  f1-score   support

           0       0.98      1.00      0.99     12328
           1       0.00      0.00      0.00       223

    accuracy                           0.98     12551
   macro avg       0.49      0.50      0.50     12551
weighted avg       0.96      0.98      0.97     12551



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
