In [None]:
import os
import pickle
import numpy as np
import snntorch as snn
import snntorch.functional as SF
import torch
from tqdm import tqdm

In [None]:
current_dir = os.path.dirname(os.path.abspath("__file__"))
output_dir = os.path.join(current_dir, "..", "outputs")

training_data = os.path.join(output_dir,"train")

In [None]:
with open(os.path.join(training_data, "resumes.pkl"), "rb") as f:
    resumes = pickle.load(f)
with open(os.path.join(training_data, "jobs.pkl"), "rb") as f:
    jobs = pickle.load(f)

In [None]:
def count_entries(pkl_path):
    with open(pkl_path, 'rb') as f:
        data = pickle.load(f)
        return len(data)

num_resumes = count_entries('resumes.pkl')
num_jobs = count_entries('jobs.pkl')

print(f"Resumes: {num_resumes} | Jobs: {num_jobs}")

In [11]:
num_resumes = len(resumes)
num_jobs = len(jobs)

print(f"Resumes: {num_resumes} | Jobs: {num_jobs}")

Resumes: 1473 | Jobs: 435


In [None]:
def incremental_population_encode(embeddings, 
                                num_neurons=5, 
                                time_steps=10, 
                                batch_size=50,
                                output_dir="spike_data"):

    os.makedirs(output_dir, exist_ok=True)

    min_val = embeddings.min()
    max_val = embeddings.max()
    embeddings = (embeddings - min_val) / (max_val - min_val + 1e-8)

    centers = torch.linspace(0, 1, num_neurons, device=embeddings.device)
    width = 0.2

    num_samples, num_dims = embeddings.shape
    output_shape = (num_dims * num_neurons,)
    
    for batch_start in tqdm(range(0, num_samples, batch_size), 
                          desc="Encoding batches"):
        batch_end = min(batch_start + batch_size, num_samples)
        batch = embeddings[batch_start:batch_end]
   
        batch_spikes = torch.zeros((len(batch), time_steps, *output_shape), 
                                 dtype=torch.float16) 
        
        probs = torch.exp(
            -((batch.unsqueeze(2) - centers.unsqueeze(0).unsqueeze(0)).pow(2) 
            / (2 * width**2))
        )

        for t in range(time_steps):
            batch_spikes[:, t] = (torch.rand_like(probs) < probs).float().view(len(batch), -1)

        torch.save(batch_spikes, 
                 os.path.join(output_dir, f"spikes_batch_{batch_start:06d}.pt"))

        del batch_spikes, probs
        torch.cuda.empty_cache() if torch.cuda.is_available() else None

    torch.save({'min': min_val, 'max': max_val}, 
              os.path.join(output_dir, "norm_params.pt"))
    
    print(f"Saved {num_samples} samples across {len(os.listdir(output_dir))-1} files in {output_dir}")
    return output_dir

X_train_tensor = torch.tensor(X_train, dtype=torch.float32) 

output_path = incremental_population_encode(
    X_train_tensor,
    num_neurons=3,      
    time_steps=5,       
    batch_size=50,      
    output_dir="spike_trains"
)

Encoding batches: 100%|██████████| 12816/12816 [03:46<00:00, 56.70it/s]


Saved 640755 samples across 12816 files in spike_trains


In [None]:
def incremental_population_encode(embeddings, 
                                output_dir,
                                num_neurons=5, 
                                time_steps=10, 
                                batch_size=50,
                                ):

    centers = torch.linspace(0, 1, num_neurons, device=embeddings.device)
    width = 0.2  

    num_samples, num_dims = embeddings.shape
    
    for batch_start in tqdm(range(0, num_samples, batch_size), desc="Encoding"):
        batch = embeddings[batch_start:batch_start+batch_size]

        batch_spikes = torch.zeros((len(batch), time_steps, num_dims * num_neurons),
                                 dtype=torch.float16)

        probs = torch.exp(
            -((batch.unsqueeze(2) - centers.unsqueeze(0).unsqueeze(0)).pow(2) / (2 * width**2))
        )
   
        for t in range(time_steps):
            batch_spikes[:, t] = (torch.rand_like(probs) < probs).float().view(len(batch), -1)

        torch.save(batch_spikes, 
                 os.path.join(output_dir, f"spikes_batch_{batch_start:06d}.pt"))
 
        del batch_spikes, probs
        torch.cuda.empty_cache() if torch.cuda.is_available() else None
    
    print(f"Saved {num_samples} samples to {output_dir}")
    return output_dir

In [None]:
resume_tensor = torch.tensor(resumes, dtype=torch.float32)  
job_tensor = torch.tensor(jobs, dtype=torch.float32)

os.makedirs(os.path.join(training_data, "spike_trains","resumes"))
os.makedirs(os.path.join(training_data, "spike_trains","jobs"))
resume_spike_dir = os.path.join(training_data, "spike_trains","resumes")
job_spike_dir = os.path.join(training_data, "spike_trains","jobs")

In [None]:
output_path = incremental_population_encode(
    resume_tensor,
    num_neurons=5,    
    output_dir=resume_spike_dir,
    time_steps=10,    
    batch_size=100    
)

Encoding: 100%|██████████| 15/15 [00:00<00:00, 17.74it/s]

Saved 1473 samples to c:\Users\karun\OneDrive\Documents\Brain Inspired AI Project\src\..\outputs\train\spike_trains\resumes





In [None]:
output_path = incremental_population_encode(
    job_tensor,
    num_neurons=5,   
    output_dir=job_spike_dir,
    time_steps=10,    
    batch_size=100    
)

Encoding: 100%|██████████| 5/5 [00:00<00:00, 18.78it/s]

Saved 435 samples to c:\Users\karun\OneDrive\Documents\Brain Inspired AI Project\src\..\outputs\train\spike_trains\jobs



