In [22]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Concatenate
from sklearn.model_selection import train_test_split


In [23]:
df = pd.read_csv("Final_DataSet.csv")
print(df.columns)

Index(['user_id', 'video_id', 'interaction', 'mood_label'], dtype='object')


In [None]:
file_path = "video_dataset_final.csv"  # Update with actual file path
df = pd.read_csv(file_path)

df = df[["user_id", "video_id", "interaction"]]

# Train-test split (80% train, 20% test)
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

train_users = train_df["user_id"].values
train_videos = train_df["video_id"].values
train_interactions = train_df["interaction"].values

test_users = test_df["user_id"].values
test_videos = test_df["video_id"].values
test_interactions = test_df["interaction"].values


In [None]:
from tensorflow.keras.layers import Input, Embedding, Flatten, Concatenate, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# Define input layers
user_input = Input(shape=(1,), name="user_input")
video_input = Input(shape=(1,), name="video_input")

user_embedding = Embedding(input_dim=df["user_id"].nunique() + 1, output_dim=16, name="user_embedding")(user_input)
video_embedding = Embedding(input_dim=df["video_id"].nunique() + 1, output_dim=16, name="video_embedding")(video_input)

user_flatten = Flatten()(user_embedding)
video_flatten = Flatten()(video_embedding)

concat_layer = Concatenate()([user_flatten, video_flatten])

dense1 = Dense(64, activation="relu")(concat_layer)
dropout1 = Dropout(0.2)(dense1)  # Added dropout

dense2 = Dense(32, activation="relu")(dropout1)
dropout2 = Dropout(0.2)(dense2)  # Added dropout

dense3 = Dense(16, activation="relu")(dropout2)
dense4 = Dense(8, activation="relu")(dense3)  # Added another dense layer for complexity

output = Dense(1, activation="sigmoid", name="output_layer")(dense4)

model = Model(inputs=[user_input, video_input], outputs=output)
model.compile(optimizer=Adam(learning_rate=0.001), loss="mse", metrics=["mae"])

model.summary()


In [None]:
# Train the model
history = model.fit(
    [train_users, train_videos], train_interactions,
    epochs=10, batch_size=32, validation_split=0.2
)


Epoch 1/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - loss: 0.2498 - mae: 0.4998 - val_loss: 0.2479 - val_mae: 0.4979
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 240ms/step - loss: 0.2474 - mae: 0.4974 - val_loss: 0.2463 - val_mae: 0.4963
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 186ms/step - loss: 0.2455 - mae: 0.4954 - val_loss: 0.2447 - val_mae: 0.4947
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 172ms/step - loss: 0.2436 - mae: 0.4935 - val_loss: 0.2431 - val_mae: 0.4930
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 185ms/step - loss: 0.2417 - mae: 0.4916 - val_loss: 0.2414 - val_mae: 0.4913
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 174ms/step - loss: 0.2397 - mae: 0.4896 - val_loss: 0.2397 - val_mae: 0.4896
Epoch 7/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 183ms/step - loss: 0.2376 - ma

In [27]:
# Predict engagement scores for test data
predictions = model.predict([test_users, test_videos])

# Convert predictions to a DataFrame
recommendations = pd.DataFrame({
    "user_id": test_users,
    "video_id": test_videos,
    "actual_interaction": test_interactions,
    "predicted_score": predictions.flatten()
})

# Sort by predicted score
recommendations = recommendations.sort_values(by="predicted_score", ascending=False)

# Show top recommendations
print(recommendations.head(10))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 217ms/step
   user_id  video_id  actual_interaction  predicted_score
4        0        21                   1         0.522647
2        0         0                   1         0.521853
3        0         8                   1         0.521842
1        0         4                   1         0.521385
5        0        13                   1         0.520620
0        0        19                   1         0.520304


In [33]:
def recommend_videos(user_id, top_n=5):
    # Get unique video IDs
    all_videos = df["video_id"].unique()

    # Predict scores for all videos for this user
    user_array = np.array([user_id] * len(all_videos))
    predictions = model.predict([user_array, all_videos])

    # Create DataFrame of predictions
    recommendation_df = pd.DataFrame({
        "video_id": all_videos,
        "predicted_score": predictions.flatten()
    })

    # Get top N recommended videos
    top_videos = recommendation_df.sort_values(by="predicted_score", ascending=False).head(top_n)
    return top_videos

# Example: Get recommendations for user_id 0
print(recommend_videos(user_id=2, top_n=5))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step
    video_id  predicted_score
7         12         0.518991
5         22         0.518842
0          0         0.517090
15         9         0.516835
20         1         0.516527


In [29]:
import pickle

# Save the trained model
with open("ncf_model.pkl", "wb") as f:
    pickle.dump(model, f)
