<a href="https://colab.research.google.com/github/JagadeeshGorji/CodTech-Week-4/blob/main/Recommendation_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split

In [3]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("tanujdhiman/post-recommendation-system")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/post-recommendation-system


In [4]:
# Load
df = pd.read_csv("/kaggle/input/post-recommendation-system/post_data.csv")
df

Unnamed: 0,user_id,post_id,time_stamp,Valuable
0,5eece14ffc13ae660900008b,136781766,01-01-2019 13:30,1
1,5eece14efc13ae660900003c,43094523,01-01-2019 13:33,2
2,5eece14efc13ae6609000025,42428071,01-01-2019 13:43,2
3,5eece14ffc13ae66090001d4,76472880,01-01-2019 13:54,2
4,5eece14ffc13ae66090000ac,202721843,01-01-2019 14:00,3
...,...,...,...,...
71795,5eece14ffc13ae660900018c,615389604,12/31/2019 12:37 AM,1
71796,5eece14ffc13ae660900010c,348689108,12/31/2019 12:50 PM,1
71797,5eece14ffc13ae6609000190,619052165,12/31/2019 12:51 AM,2
71798,5eece14efc13ae6609000067,426384418,12/31/2019 12:51 PM,1


In [5]:
df.columns = df.columns.str.strip()


In [6]:
if 'Valuable' in df.columns:
    print("The 'Valuable' column exists in the dataset.")
else:
    print("The 'Valuable' column does not exist in the dataset.")
if 'user_id' in df.columns:
    print("The 'user_id' column exists in the dataset.")
else:
    print("The 'user_id' column does not exist in the dataset.")
if 'post_id' in df.columns:
    print("The 'post_id' column exists in the dataset.")
else:
    print("The 'post_id' column does not exist in the dataset.")

The 'Valuable' column exists in the dataset.
The 'user_id' column exists in the dataset.
The 'post_id' column exists in the dataset.


In [7]:
# Encode IDs
user2idx = {id: i for i, id in enumerate(df['user_id'].unique())}
item2idx = {id: i for i, id in enumerate(df['post_id'].unique())}

df['user'] = df['user_id'].map(user2idx)
df['item'] = df['post_id'].map(item2idx)

# Train/test split
train, test = train_test_split(df[['user', 'item', 'Valuable']], test_size=0.2)


In [8]:
class MF(nn.Module):
    def __init__(self, num_users, num_items, latent_dim=50):
        super(MF, self).__init__()
        self.user_emb = nn.Embedding(num_users, latent_dim)
        self.item_emb = nn.Embedding(num_items, latent_dim)

    def forward(self, u, i):
        return (self.user_emb(u) * self.item_emb(i)).sum(1)


In [9]:
model = MF(len(user2idx), len(item2idx))
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

u = torch.tensor(train['user'].values)
i = torch.tensor(train['item'].values)
r = torch.tensor(train['Valuable'].values, dtype=torch.float)

for epoch in range(100):
    model.train()
    optimizer.zero_grad()
    pred = model(u, i)
    loss = loss_fn(pred, r)
    loss.backward()
    optimizer.step()
    print(f"Epoch {epoch+1}: loss = {loss.item():.4f}")


Epoch 1: loss = 61.1596
Epoch 2: loss = 58.1405
Epoch 3: loss = 55.2477
Epoch 4: loss = 52.4794
Epoch 5: loss = 49.8333
Epoch 6: loss = 47.3068
Epoch 7: loss = 44.8970
Epoch 8: loss = 42.6006
Epoch 9: loss = 40.4144
Epoch 10: loss = 38.3347
Epoch 11: loss = 36.3578
Epoch 12: loss = 34.4799
Epoch 13: loss = 32.6971
Epoch 14: loss = 31.0055
Epoch 15: loss = 29.4013
Epoch 16: loss = 27.8804
Epoch 17: loss = 26.4392
Epoch 18: loss = 25.0737
Epoch 19: loss = 23.7804
Epoch 20: loss = 22.5556
Epoch 21: loss = 21.3959
Epoch 22: loss = 20.2978
Epoch 23: loss = 19.2582
Epoch 24: loss = 18.2740
Epoch 25: loss = 17.3421
Epoch 26: loss = 16.4598
Epoch 27: loss = 15.6242
Epoch 28: loss = 14.8329
Epoch 29: loss = 14.0834
Epoch 30: loss = 13.3733
Epoch 31: loss = 12.7006
Epoch 32: loss = 12.0630
Epoch 33: loss = 11.4586
Epoch 34: loss = 10.8857
Epoch 35: loss = 10.3423
Epoch 36: loss = 9.8270
Epoch 37: loss = 9.3381
Epoch 38: loss = 8.8741
Epoch 39: loss = 8.4338
Epoch 40: loss = 8.0158
Epoch 41: loss

In [10]:
def recommend(user_id, top_k=5):
    model.eval()
    u_idx = torch.tensor([user2idx[user_id]] * len(item2idx))
    i_idx = torch.tensor(list(item2idx.values()))
    scores = model(u_idx, i_idx).detach().numpy()
    top_items = sorted(zip(item2idx.keys(), scores), key=lambda x: -x[1])[:top_k]
    return [item for item, _ in top_items]

# Example
print("Top picks:", recommend(user_id='5eece14efc13ae6609000067'))

Top picks: [np.int64(103202313), np.int64(217310220), np.int64(328725259), np.int64(692529126), np.int64(286112666)]
