In [16]:
import numpy as np
import sklearn
import tensorflow as tf
import pandas as pd

from keras.src.layers import Dense
from tensorflow import keras
from keras import layers
from tensorflow.keras.layers import Embedding, Flatten, Dense, Dot, Input
from tensorflow.keras.models import Sequential, Model


# Load The Data

In [8]:
users = pd.read_csv("data/ratings.csv")
users.head(5)

Unnamed: 0,userId,movieId,rating,timestamp
0,1,110,1.0,1425941529
1,1,147,4.5,1425942435
2,1,858,5.0,1425941523
3,1,1221,5.0,1425941546
4,1,1246,5.0,1425941556


# Collaborative Filtering Model

### When you think collaborative filtering, think of statements like:
- Users who liked similar items also liked...
- Items similar to this item

### 🧠 “Behavioral Similarity”
The system learns from what users did, not what items are about.

> It doesn’t care what genre the item is — it just learns from the pattern of user behavior.

### 🔍 How It Works:
- Looks at user-item interactions (ratings, likes, views)
- Learns latent similarities between users or items
- Powered by embeddings, matrix factorization, or neural models

finds patterns in behavior



In [28]:
num_users = users['userId'].max()
num_items = users['movieId'].max()

user_ids = users['userId'] - 1 
movie_ids = users['movieId'] - 1

user_input = Input(shape=(1,), name='userId')
item_input = Input(shape=(1,), name='movieId')

embedding_dim = 32
user_embedding = Embedding(input_dim=num_users, output_dim=embedding_dim, name='user_embedding')(user_input)
item_embedding = Embedding(input_dim=num_items, output_dim=embedding_dim, name='item_embedding')(item_input)

print(user_embedding.shape)

user_vec = Flatten()(user_embedding) # Creates flatten object than calls it on user_embedding.
item_vec = Flatten()(item_embedding)

user_vec.shape


(None, 1, 32)


(None, 32)

In [29]:
dot_product = Dot(axes=1)([user_vec, item_vec])

model = Model(inputs=[user_input, item_input], outputs=dot_product)
model.compile(loss='mse', optimizer='adam')
model.summary()

In [30]:
model.fit(
    x =[user_ids, movie_ids],
    y = users['rating'],
    epochs = 10,
    batch_size = 64,
    validation_split = 0.2
)

Epoch 1/10
[1m   776/325304[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m12:48:00[0m 142ms/step - loss: 13.6467

KeyboardInterrupt: 

# Content-Based Filtering Model

When you think of content-based filtering, think of statements like:
- Because you liked horror
- Because you searched laptops

### 🧠 “Attribute Similarity”
The system uses the metadata or features of items (or users) directly.

> It recommends items with similar features to what you liked, not because other users liked them.

### 📦 How It Works:
- Uses item (or user) attributes: genres, categories, descriptions
- Builds a user profile from liked item features
- Compares feature vectors (e.g., via cosine similarity)

finds patterns in features

Collaborative filtering learns from who likes what, no matter what it is.

Content-based filtering learns from what the thing is, no matter who liked it.

In [None]:
user_NN = Sequential([
    layers.Input(shape = num_user_features),
    Dense(256, activation='relu'),
    Dense(128, activation='relu'),
    Dense(32)
])

item_NN = Sequential([
    layers.Input(shape= num_item_features),
    Dense(256, activation='relu'),
    Dense(128, activation='relu'),
    Dense(32)
])

vu = user_NN(input_user)
vu = tf.linalg.l2_normalize(vu, axis=1)

vm = item_NN(input_item)
vm = tf.linalg.l2_normalize(vm, axis=1)

output = layers.Dot(axes=1)([vu, vm])

model = Model([input_user, input_item], output)

cost_fn = keras.losses.MeanSquaredError()

# Training

# Testing