In [20]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [21]:
import pandas as pd

# Define the file path for "a.csv" in the root directory of Google Drive
file_path = "/content/drive/MyDrive/train_v2.csv"

# Load the CSV file into a DataFrame


In [22]:
import pandas as pd
from torch.utils.data import Dataset

import torch
from sklearn import preprocessing
from torch.utils.data import DataLoader
from sklearn import model_selection
import torch.nn as nn
from sklearn import metrics
import numpy as np

class JobDataset(Dataset):
    def __init__(self, user, job, proposal):
        self.user = user
        self.job = job
        self.proposal = proposal

    def __len__(self):
        return len(self.user)

    def __getitem__(self, item):
        user = self.user[item]
        job = self.job[item]
        proposal = self.proposal[item]

        return {
            "user": torch.tensor(user, dtype=torch.long),
            "job": torch.tensor(job, dtype=torch.long),
            "proposal": torch.tensor(proposal, dtype=torch.float)
        }

class RecsysModel(nn.Module):
    def __init__(self, num_users, num_jobs, embedding_dim=32):
        super(RecsysModel, self).__init__()
        self.user_embed = nn.Embedding(num_users, embedding_dim)
        self.job_embed = nn.Embedding(num_jobs, embedding_dim)
        self.out = nn.Linear(embedding_dim, 1)

    def forward(self, user, job):
        user_embeds = self.user_embed(user)
        job_embeds = self.job_embed(job)

        # You can add a scoring mechanism here, such as dot product or cosine similarity
        # Here, we'll use dot product as an example
        scores = torch.sum(user_embeds * job_embeds, dim=1, keepdim=True)
        return scores

def train():
    # Load your input CSV data
    input_data = pd.read_csv('/content/drive/MyDrive/train_v2.csv')

    lbl_user = preprocessing.LabelEncoder()
    lbl_job = preprocessing.LabelEncoder()

    input_data['user'] = lbl_user.fit_transform(input_data['user'])
    input_data['job'] = lbl_job.fit_transform(input_data['job'])

    # Save label encoders and number of users and jobs
    pd.DataFrame({'classes': lbl_user.classes_}).to_csv('/content/drive/MyDrive/newTrain/csvs/lbl_user_classes_02.csv', index=False)
    pd.DataFrame({'classes': lbl_job.classes_}).to_csv('/content/drive/MyDrive/newTrain/csvs/lbl_job_classes_02.csv', index=False)
    pd.DataFrame({'num_users': [len(lbl_user.classes_)], 'num_jobs': [len(lbl_job.classes_)]}).to_csv('/content/drive/MyDrive/newTrain/csvs/num_users_jobs_02.csv', index=False)

    # train_data, valid_data = model_selection.train_test_split(
    #     input_data, test_size=0.1, random_state=42, stratify=input_data.proposal.values
    # )
    train_data, valid_data = model_selection.train_test_split(
        input_data, test_size=0.1, random_state=42
    )

    train_dataset = JobDataset(
        user=train_data.user.values, job=train_data.job.values, proposal=train_data.proposal.values
    )
    valid_dataset = JobDataset(
        user=valid_data.user.values, job=valid_data.job.values, proposal=valid_data.proposal.values
    )

    model = RecsysModel(num_users=len(lbl_user.classes_), num_jobs=len(lbl_job.classes_))

    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
    valid_loader = DataLoader(valid_dataset, batch_size=64, shuffle=False)

    # Define your loss function and optimizer
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

    # Training loop
    num_epochs = 10
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        for batch in train_loader:
            user_ids = batch['user']
            job_ids = batch['job']
            proposals = batch['proposal']

            optimizer.zero_grad()

            outputs = model(user_ids, job_ids)
            loss = criterion(outputs, proposals.view(-1, 1))

            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        train_loss /= len(train_loader)

        # Validation loop
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for batch in valid_loader:
                user_ids = batch['user']
                job_ids = batch['job']
                proposals = batch['proposal']

                outputs = model(user_ids, job_ids)
                loss = criterion(outputs, proposals.view(-1, 1))

                val_loss += loss.item()

            val_loss /= len(valid_loader)

        print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}')

    # Save the trained model
    torch.save(model.state_dict(), '/content/drive/MyDrive/newTrain/models/trained_model_02.pth')

if __name__ == "__main__":
    train()


Epoch 1/10, Train Loss: 73.6616, Validation Loss: 86.8847
Epoch 2/10, Train Loss: 75.8126, Validation Loss: 86.8623
Epoch 3/10, Train Loss: 72.8465, Validation Loss: 86.8312
Epoch 4/10, Train Loss: 66.2741, Validation Loss: 86.8044
Epoch 5/10, Train Loss: 68.1539, Validation Loss: 86.7787
Epoch 6/10, Train Loss: 66.0811, Validation Loss: 86.7511
Epoch 7/10, Train Loss: 66.4566, Validation Loss: 86.7232
Epoch 8/10, Train Loss: 75.0649, Validation Loss: 86.6968
Epoch 9/10, Train Loss: 73.6374, Validation Loss: 86.6701
Epoch 10/10, Train Loss: 72.5559, Validation Loss: 86.6470


In [23]:
import pandas as pd
import torch
from sklearn import preprocessing
from torch.utils.data import DataLoader

# Load label encoders and number of users and jobs
lbl_user = preprocessing.LabelEncoder()
lbl_job = preprocessing.LabelEncoder()
lbl_user.classes_ = pd.read_csv('/content/drive/MyDrive/newTrain/csvs/lbl_user_classes_02.csv')['classes']
lbl_job.classes_ = pd.read_csv('/content/drive/MyDrive/newTrain/csvs/lbl_job_classes_02.csv')['classes']
num_users_jobs = pd.read_csv('/content/drive/MyDrive/newTrain/csvs/num_users_jobs_02.csv')

class Tester:
    def __init__(self, model_path):
        self.model = RecsysModel(num_users=num_users_jobs['num_users'][0], num_jobs=num_users_jobs['num_jobs'][0])
        self.model.load_state_dict(torch.load(model_path))
        self.model.eval()

    def test(self, user_ids, job_ids):
        recommendations = []

        for user_id in user_ids:
            if user_id in lbl_user.classes_:
                user_idx = lbl_user.transform([user_id])[0]
                job_scores = self.predict_jobs(user_idx)
                recommendations.append({"user_id": user_id, "job_scores": job_scores})
            else:
                recommendations.append({"user_id": user_id, "job_scores": []})

        return recommendations

    def predict_jobs(self, user_idx):
        user_idx_tensor = torch.tensor([user_idx], dtype=torch.long)
        job_ids_tensor = torch.arange(num_users_jobs['num_jobs'][0], dtype=torch.long)

        with torch.no_grad():
            job_scores = self.model(user_idx_tensor, job_ids_tensor).squeeze().numpy()

        return job_scores

def main():
    model_path = '/content/drive/MyDrive/newTrain/models/trained_model_02.pth'  # Specify the path to your trained model
    tester = Tester(model_path)

    while True:
        user_id = input("Enter the user ID (or 'exit' to quit): ")
        if user_id.lower() == 'exit':
            break

        # You can modify the job_ids list as needed to provide recommendations for specific jobs
        job_ids = lbl_job.classes_  # Recommend jobs for all available jobs

        # Modify this line to pass only one user_id at a time
        recommendations = tester.test([user_id], job_ids)

        print(f"Job recommendations for user {user_id}:")

        for recommended_jobs in recommendations:
            print(f"User: {recommended_jobs['user_id']}")

            if not recommended_jobs['job_scores']:
                print("No recommendations")
            else:
                job_scores = recommended_jobs['job_scores']
                sorted_jobs = sorted(
                    zip(job_ids, job_scores),
                    key=lambda x: x[1],
                    reverse=True
                )

                for job, score in sorted_jobs:
                    print(f"Job: {job}, Score: {score}")

if __name__ == "__main__":
    main()


Enter the user ID (or 'exit' to quit): 722
Job recommendations for user 722:
User: 722
No recommendations
Enter the user ID (or 'exit' to quit): exit
