In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Dot, Reshape, Dense
from tensorflow.keras.models import Model
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Step 1: Preprocessing and data loading

# Load historical customer data
customer_data = pd.read_csv('synthetic_data.csv')

# Step 2: Collaborative Filtering

# Compute user-item matrix
user_item_matrix = customer_data.pivot_table(index='user_id', columns='item_id', values='rating', fill_value=0)

# Compute similarity between users using cosine similarity
user_similarity = cosine_similarity(user_item_matrix, user_item_matrix)

# Get top N similar users for each user
N = 5
top_similar_users = {}
for i, user in enumerate(user_item_matrix.index):
    similar_users = user_similarity[i].argsort()[:-N-1:-1]
    top_similar_users[user] = similar_users[1:]  # Exclude the user itself

# Step 3: Content-based Filtering

# Compute item-item matrix using TF-IDF vectors
vectorizer = TfidfVectorizer()
item_matrix = vectorizer.fit_transform(customer_data['item_description'])

# Compute similarity between items using cosine similarity
item_similarity = cosine_similarity(item_matrix, item_matrix)

# Get top N similar items for each item
top_similar_items = {}
for i, item in enumerate(customer_data['item_id'].unique()):
    similar_items = item_similarity[i].argsort()[:-N-1:-1]
    top_similar_items[item] = similar_items[1:]  # Exclude the item itself

# Step 4: Neural Network Model for Hybrid Filtering

# Define input layers
user_input = Input(shape=(1,))
item_input = Input(shape=(1,))

# Embedding layers
embedding_dim = 50  # Adjust the embedding dimension as needed
num_items = len(customer_data['item_id'].unique()) + 1
num_items = len(customer_data['item_id'].unique())
user_embedding = Embedding(input_dim=num_users, output_dim=embedding_dim)(user_input)
item_embedding = Embedding(input_dim=num_items, output_dim=embedding_dim)(item_input)

# Flattening the embedding layers
user_embedding = Reshape(target_shape=(embedding_dim,))(user_embedding)
item_embedding = Reshape(target_shape=(embedding_dim,))(item_embedding)

# Dot product between user and item embeddings
dot_product = Dot(axes=1)([user_embedding, item_embedding])

# Output layer
output = Dense(units=1, activation='sigmoid')(dot_product)

# Create and compile the model
model = Model(inputs=[user_input, item_input], outputs=output)
model.compile(optimizer='adam', loss='binary_crossentropy')

from sklearn.preprocessing import LabelEncoder

# Encode user_id column with unique integer values
label_encoder = LabelEncoder()
customer_data['user_id'] = label_encoder.fit_transform(customer_data['user_id'])

customer_data['item_id'] = customer_data['item_id'].astype('int')
customer_data['purchase'] = customer_data['purchase'].astype('float')
print(customer_data)
X = pd.DataFrame(customer_data[['user_id','item_id']])
y = pd.DataFrame(customer_data['purchase'])
# Train the model
model.fit([customer_data['user_id'], customer_data['item_id']], customer_data['purchase'], batch_size=128, epochs=10, validation_split=0.2)

# Step 5: Data Visualization

# Plotting user-item matrix heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(user_item_matrix, cmap='Blues', cbar=False)
plt.title('User-Item Matrix Heatmap')
plt.xlabel('Item ID')
plt.ylabel('User ID')
plt.show()

# Plotting item-item matrix heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(item_similarity, cmap='Blues', cbar=False)
plt.title