In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Embedding, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

# Load MovieLens dataset (UPDATE THE FILE PATH BELOW)
file_path = "/content/ratings.csv"  # <-- Change this!
df = pd.read_csv(file_path)

# Sort data by user and timestamp
df = df.sort_values(by=['userId', 'timestamp'])

# Create sequences of movie interactions for each user
user_sequences = df.groupby('userId')['movieId'].apply(list)

# Convert movie IDs to a contiguous range
unique_movies = list(set(df['movieId']))
movie_to_index = {movie: i + 1 for i, movie in enumerate(unique_movies)}  # Reserve 0 for padding
index_to_movie = {i + 1: movie for i, movie in enumerate(unique_movies)}
df['movieId'] = df['movieId'].map(movie_to_index)

# Prepare input-output sequences (X: history, y: next movie)
sequence_length = 5
X, y = [], []

for user_movies in user_sequences:
    indexed_movies = [movie_to_index[m] for m in user_movies if m in movie_to_index]
    for i in range(len(indexed_movies) - sequence_length):
        X.append(indexed_movies[i:i + sequence_length])
        y.append(indexed_movies[i + sequence_length])

X = pad_sequences(X, maxlen=sequence_length, padding='pre')
y = np.array(y)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build LSTM model
vocab_size = len(movie_to_index) + 1  # +1 for padding index
embedding_dim = 50

model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=sequence_length),
    LSTM(64, return_sequences=False),
    Dense(vocab_size, activation='softmax')  # Predicting next movie ID
])

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model (reduced epochs for quick testing)
history = model.fit(X_train, y_train, epochs=5, batch_size=32, validation_data=(X_test, y_test))

# Print model summary
model.summary()

# Function to predict next movie for a given sequence
def recommend_next_movie(movie_sequence):
    movie_sequence = [movie_to_index.get(m, 0) for m in movie_sequence]  # Convert to index
    movie_sequence = pad_sequences([movie_sequence], maxlen=sequence_length, padding='pre')
    predicted_index = np.argmax(model.predict(movie_sequence), axis=-1)[0]
    return index_to_movie.get(predicted_index, "Unknown Movie")

# Example usage: Predict next movie for a user sequence
example_sequence = list(user_sequences.iloc[0])[:sequence_length]  # Get first user sequence
predicted_movie = recommend_next_movie(example_sequence)
print("Predicted next movie ID:", predicted_movie)



Epoch 1/5
[1m11877/11877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m109s[0m 9ms/step - accuracy: 0.0030 - loss: 8.2477 - val_accuracy: 0.0076 - val_loss: 7.5790
Epoch 2/5
[1m11877/11877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m105s[0m 9ms/step - accuracy: 0.0090 - loss: 7.3715 - val_accuracy: 0.0125 - val_loss: 7.4243
Epoch 3/5
[1m11877/11877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m149s[0m 9ms/step - accuracy: 0.0145 - loss: 7.0967 - val_accuracy: 0.0155 - val_loss: 7.3826
Epoch 4/5
[1m11877/11877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m104s[0m 9ms/step - accuracy: 0.0203 - loss: 6.8786 - val_accuracy: 0.0183 - val_loss: 7.3778
Epoch 5/5
[1m11877/11877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m150s[0m 9ms/step - accuracy: 0.0258 - loss: 6.7063 - val_accuracy: 0.0201 - val_loss: 7.4009


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 175ms/step
Predicted next movie ID: 3160


# New section