#### Task 1
 Set up and Data preparation
 

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder


#Load a part of the data to make things faster
df = pd.read_csv("../data/transactions_train.csv").head(500000)

#Preprocessing the IDS into continuous integer codes that the embedding layer can use.

#creating user and item encoders
user_encoder = LabelEncoder()
article_encoder = LabelEncoder()

# Fit and transform the IDs to integer codes
df['user_code'] = user_encoder.fit_transform(df['customer_id'])
df['article_code'] = article_encoder.fit_transform(df['article_id'])

# Get the number of unique users and articles
n_users = df['user_code'].nunique()
n_articles = df['article_code'].nunique()

print(f"Number of unique users: {n_users}")
print(f"Number of unique articles: {n_articles}")


#Creating Training Data
# Positive examples (the ones we have)
X = df[['user_code', 'article_code']].values #to create numpy array of the user and the item beside
y = np.ones(len(df)) # All are purchases, so target is 1,an array of 1's as the target

# In a full project, we would add "negative samples" here (target=0) so that we can allow the model know or understand when the user didn't buy 
# For today, we will skip this step to focus on the model architecture.







Number of unique users: 119904
Number of unique articles: 24942


#### Task 2
Building the Model Architecture

In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Define embedding size
embedding_dim = 32

# --- User Input Pathway ---
user_input = layers.Input(shape=(1,), name='user_input')
user_embedding = layers.Embedding(n_users, embedding_dim, name='user_embedding')(user_input)
user_vec = layers.Flatten(name='flatten_user')(user_embedding)

# --- Article Input Pathway ---
article_input = layers.Input(shape=(1,), name='article_input')
article_embedding = layers.Embedding(n_articles, embedding_dim, name='article_embedding')(article_input)
article_vec = layers.Flatten(name='flatten_article')(article_embedding)

# --- Combine & Create Model ---
concatenated = layers.Concatenate()([user_vec, article_vec])
dense_1 = layers.Dense(128, activation='relu')(concatenated)
dense_2 = layers.Dense(64, activation='relu')(dense_1)
output = layers.Dense(1, activation='sigmoid')(dense_2) # Sigmoid for 0-1 probability

model = keras.Model(inputs=[user_input, article_input], outputs=output)

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Print the model summary
model.summary()
