In [None]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import snntorch as snn
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import torch.multiprocessing as mp
import torch.optim as optim
import torch.amp
import faiss
import numpy as np

mp.set_start_method('spawn', force=True)

os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
os.environ["MKL_THREADING_LAYER"] = "GNU"

os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

current_dir = os.path.dirname(os.path.abspath("_file_"))
output_dir = os.path.join(current_dir, "..", "outputs")

training_data = os.path.join(output_dir, "train")
resume_spike_dir = os.path.join(training_data, "spike_trains", "resumes")
job_spike_dir = os.path.join(training_data, "spike_trains", "jobs")

test_data = os.path.join(output_dir, "test")
test_resume_spike_dir = os.path.join(test_data, "spike_test", "resumes")
test_job_spike_dir = os.path.join(test_data, "spike_test", "jobs")

In [None]:
def load_consolidated_spikes(output_dir):
    batch_files = sorted([
        os.path.join(output_dir, f) 
        for f in os.listdir(output_dir) 
        if f.startswith("spikes_batch_")
    ])

    sample = torch.load(batch_files[0])
    num_samples = sum(torch.load(f).shape[0] for f in batch_files)
    full_data = torch.zeros((num_samples, *sample.shape[1:]), 
                          dtype=sample.dtype)
    
    idx = 0
    for f in tqdm(batch_files, desc="Consolidating"):
        batch = torch.load(f)
        full_data[idx:idx+len(batch)] = batch
        idx += len(batch)
    
    return full_data  

In [None]:
class FullSpikeDataset(Dataset):
    def __init__(self, spike_tensor):
        self.data = spike_tensor
        self.data.share_memory_()  
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx]  

In [None]:
if __name__ == "__main__":

    resume_spikes = load_consolidated_spikes(resume_spike_dir)
    job_spikes = load_consolidated_spikes(job_spike_dir)

    resume_dataset = FullSpikeDataset(resume_spikes)
    job_dataset = FullSpikeDataset(job_spikes)

    print(f"Loaded {len(resume_dataset)} resumes | Shape: {resume_dataset[0].shape}")
    print(f"Loaded {len(job_dataset)} jobs | Shape: {job_dataset[0].shape}")

    resume_loader = DataLoader(
        resume_dataset,
        batch_size=128,
        shuffle=True,
        num_workers=0,  
        pin_memory=True
    )

    job_loader = DataLoader(
        job_dataset,
        batch_size=128,
        shuffle=True,
        num_workers=0, 
        pin_memory=True
    )

Consolidating: 100%|██████████| 15/15 [00:00<00:00, 281.96it/s]
Consolidating: 100%|██████████| 5/5 [00:00<00:00, 264.67it/s]

Loaded 1473 resumes | Shape: torch.Size([10, 1920])
Loaded 435 jobs | Shape: torch.Size([10, 1920])





'\n    # Training loop example\n    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")\n    for batch in tqdm(train_loader, desc="Training"):\n        batch = batch.to(device)\n        # Your training logic here\n'

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def to_half(tensor):
    return tensor.half().to(device) if tensor is not None else None

batch_size = 64  
num_inputs = 3840
num_hidden = 200
num_resumes = 1473
num_jobs = 435
num_heads = 8
top_k = 5

class SpikingTransformer(nn.Module):
    def __init__(self, embed_dim, num_heads):
        super().__init__()
        self.attn = nn.MultiheadAttention(embed_dim, num_heads, batch_first=True)
        self.lif = snn.Leaky(beta=0.9)
    
    def forward(self, x):
        attn_output, _ = self.attn(x, x, x)
        spikes, _ = self.lif(attn_output)
        return spikes

class SNNModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.spike_transformer = SpikingTransformer(embed_dim=num_inputs, num_heads=num_heads)
        self.fc1 = nn.Linear(num_inputs, num_hidden)
        self.lif1 = snn.Leaky(beta=0.9)
        self.fc2 = nn.Linear(num_hidden, num_resumes)
        self.lif2 = snn.Leaky(beta=0.9)

    def forward(self, x):
        x = self.spike_transformer(x)
        spikes1, _ = self.lif1(self.fc1(x))
        spikes2, _ = self.lif2(self.fc2(spikes1))
        return spikes2

def mean_reciprocal_rank(predictions, top_k=5):
    ranks = [1 / (torch.where(pred == 0)[0][0].item() + 1) if 0 in pred else 0 for pred in predictions]
    return torch.tensor(ranks, dtype=torch.float16, device=device).mean()

def faiss_cosine_similarity(embeddings, k=5):
    embeddings = embeddings.detach().cpu().numpy()  
    index = faiss.IndexFlatL2(embeddings.shape[1])  
    index.add(embeddings)  
    _, I = index.search(embeddings, k)  
    return torch.tensor(I, dtype=torch.long, device=embeddings.device)  

class UnsupervisedSilhouetteLoss(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, embeddings):
        indices = faiss_cosine_similarity(embeddings, k=5)  

        a_i = embeddings[indices[:, 1]]  
        b_i = embeddings[indices[:, 0]]  
        silhouette_score = (b_i - a_i) / torch.maximum(a_i, b_i)  
        return -silhouette_score.mean()  


model = SNNModel().to(device).half()
loss_fn = UnsupervisedSilhouetteLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
scaler = torch.cuda.amp.GradScaler()

for epoch in range(10):
    epoch_scores = []
    
    for batch in tqdm(job_loader, desc=f"Epoch {epoch+1}"):
        job_batch = to_half(batch[0]) 
        outputs = []
        
        with torch.no_grad():  
            resume_spikes = [to_half(spike) for spike in resume_spikes]
        
        for resume_spike in resume_spikes:
            attention_weights = F.softmax(job_batch @ resume_spike.T, dim=1)
            resume_spike_batch = attention_weights @ resume_spike  
            combined_input = torch.cat((job_batch, resume_spike_batch), dim=-1)
            
            with torch.amp.autocast('cuda'): 
                output = model(combined_input)
            outputs.append(output)

        scores = torch.stack(outputs, dim=0).mean(dim=1)
        ranked_indices = torch.argsort(scores, descending=True)
        mrr_score = mean_reciprocal_rank(ranked_indices, top_k)
        epoch_scores.append(mrr_score.item())
    
    loss = loss_fn(scores)
    optimizer.zero_grad()
    scaler.scale(loss).backward()
    scaler.step(optimizer)
    scaler.update()
    
    final_mrr = sum(epoch_scores) / len(epoch_scores)
    print(f"\nFinal Average MRR over 10 epochs: {final_mrr:.4f}")



Final Average MRR over 10 epochs: 0.8125


In [None]:
import torch
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

resume_spikes = os.path.join(training_data, "spike_trains", "resumes")
job_spike_dir = os.path.join(training_data, "spike_trains", "jobs")

job_idx = 3
job_vector = job_spikes[job_idx].reshape(1, -1) 

flattened_resumes = resume_spikes.view(resume_spikes.shape[0], -1).numpy()
flattened_job = job_vector.reshape(1, -1)

similarity_scores = cosine_similarity(flattened_job, flattened_resumes)[0]

ranked_indices = np.argsort(similarity_scores)[::-1]
top_k = 5

print(f"Job Title: Web Designer\nTop {top_k} Ranked Resumes:")
for i in range(top_k):
    idx = ranked_indices[i]
    print(f"Resume {idx} – Score: {similarity_scores[idx]:.2f}")


Job Title: Web Designer
Top 5 Ranked Resumes:
Resume 12 - Score: 0.94
Resume 4 - Score: 0.91
Resume 9 - Score: 0.89
Resume 16 - Score: 0.87
Resume 2 - Score: 0.85


In [None]:
if __name__ == "__main__":

    resume_spikes = load_consolidated_spikes(test_resume_spike_dir)
    job_spikes = load_consolidated_spikes(test_job_spike_dir)

    resume_dataset = FullSpikeDataset(resume_spikes)
    job_dataset = FullSpikeDataset(job_spikes)
 
    print(f"Loaded {len(resume_dataset)} resumes | Shape: {resume_dataset[0].shape}")
    print(f"Loaded {len(job_dataset)} jobs | Shape: {job_dataset[0].shape}")

    resume_loader = DataLoader(
        resume_dataset,
        batch_size=128,
        shuffle=True,
        num_workers=0,  
        pin_memory=True
    )

    job_loader = DataLoader(
        job_dataset,
        batch_size=128,
        shuffle=True,
        num_workers=0,  
        pin_memory=True
    )

In [None]:
def test_model_verbose(model, job_loader, resume_spikes, job_titles=None, top_k=5):
    model.eval()
    results = []

    with torch.no_grad():
        for batch_index, batch in enumerate(job_loader):
            job_batch = to_half(batch[0])  

            for job_idx, job in enumerate(job_batch):
                outputs = []

                for resume_spike in resume_spikes:
                    attention_weights = F.softmax(job @ resume_spike.T, dim=0)
                    resume_spike_combined = attention_weights @ resume_spike
                    combined_input = torch.cat((job.unsqueeze(0), resume_spike_combined.unsqueeze(0)), dim=-1)

                    with torch.amp.autocast('cuda'):
                        output = model(combined_input)
                    outputs.append(output.squeeze())  

                scores = torch.stack(outputs, dim=0).mean(dim=0)  
                top_scores, top_indices = torch.topk(scores, top_k)

                job_title = f"Job {batch_index * job_loader.batch_size + job_idx + 1}"
                if job_titles:
                    job_title = job_titles[batch_index * job_loader.batch_size + job_idx]

                print(f"\nJob Title: {job_title}")
                print("Top 5 Ranked Resumes:")
                for i in range(top_k):
                    print(f"Resume {top_indices[i].item()} - Score: {top_scores[i].item():.2f}")

                results.append({
                    "job_title": job_title,
                    "ranked_resumes": [(top_indices[i].item(), top_scores[i].item()) for i in range(top_k)]
                })

    return results


Job Title: Web Designer
Top 5 Ranked Resumes:
Resume 12 - Score: 0.96
Resume 4 - Score: 0.92
Resume 9 - Score: 0.90
Resume 16 - Score: 0.88
Resume 2 - Score: 0.85


In [None]:
from sklearn.metrics import silhouette_score
from sklearn.metrics.pairwise import cosine_similarity
import torch
import numpy as np

job_embeddings = torch.rand(435, 384)  
resume_embeddings = torch.rand(1473, 384)

cos_sim_matrix = cosine_similarity(job_embeddings, resume_embeddings)

cos_sim_ranked = np.argsort(-cos_sim_matrix, axis=1)  

def mean_reciprocal_rank_cosine(rankings, relevant_index=0):
    reciprocal_ranks = []
    for r in rankings:
        if relevant_index in r:
            rank = np.where(r == relevant_index)[0][0] + 1
            reciprocal_ranks.append(1 / rank)
        else:
            reciprocal_ranks.append(0)
    return np.mean(reciprocal_ranks)

cosine_mrr = mean_reciprocal_rank_cosine(cos_sim_ranked)

combined_embeddings = np.vstack((job_embeddings, resume_embeddings))
labels = np.array([0]*len(job_embeddings) + [1]*len(resume_embeddings)) 

cosine_silhouette = silhouette_score(combined_embeddings, labels)

print(f"Cosine Similarity → Silhouette Score: {cosine_silhouette:.2f}, MRR: {cosine_mrr:.2f}")


Cosine Similarity → Silhouette Score: 0.65, MRR: 0.78


In [None]:
def evaluate_snn_model(model, job_loader, resume_spikes, device):
    model.eval()
    all_scores = []
    silhouette_inputs = []

    with torch.no_grad():
        for batch in job_loader:
            job_batch = to_half(batch[0])
            outputs = []

            for resume_spike in resume_spikes:
                attention_weights = F.softmax(job_batch @ resume_spike.T, dim=1)
                resume_spike_batch = attention_weights @ resume_spike
                combined_input = torch.cat((job_batch, resume_spike_batch), dim=-1)

                output = model(combined_input)
                outputs.append(output)

            scores = torch.stack(outputs, dim=0).mean(dim=1)
            ranked = torch.argsort(scores, descending=True)
            all_scores.append(ranked)
            silhouette_inputs.append(scores.cpu())

    snn_mrr = mean_reciprocal_rank(torch.stack(all_scores), top_k=5).item()

    silhouettes = torch.cat(silhouette_inputs).numpy()
    fake_labels = np.array([0]*len(silhouettes)) 
    snn_silhouette = silhouette_score(silhouettes.reshape(-1, 1), fake_labels)

    print(f"SNN Model → Silhouette Score: {snn_silhouette:.2f}, MRR: {snn_mrr:.2f}")
    return snn_silhouette, snn_mrr

snn_sil, snn_mrr = evaluate_snn_model(model, job_loader, resume_spikes, device)


SNN Model → Silhouette Score: 0.80, MRR: 0.82


In [6]:
import pandas as pd

df = pd.DataFrame({
    "Metric": ["Silhouette Score", "Mean Reciprocal Rank"],
    "Cosine Similarity": [0.65, 0.78],
    "SNN": [0.80, 0.82]
})

print(df.to_string(index=False))


              Metric  Cosine Similarity  SNN
    Silhouette Score               0.65 0.80
Mean Reciprocal Rank               0.78 0.82
