In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from transformers import T5Tokenizer, TFT5ForConditionalGeneration
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy






In [3]:
data=pd.read_csv('JOB DATA.csv')
data

Unnamed: 0,Job Title,Job Description,skills,Responsibilities
0,Digital Marketing Specialist,Social Media Managers oversee an organizations...,"Social media platforms (e.g., Facebook, Twitte...","Manage and grow social media accounts, create ..."
1,Web Developer,Frontend Web Developers design and implement u...,"HTML, CSS, JavaScript Frontend frameworks (e.g...","Design and code user interfaces for websites, ..."
2,Operations Manager,Quality Control Managers establish and enforce...,Quality control processes and methodologies St...,Establish and enforce quality control standard...
3,Network Engineer,"Wireless Network Engineers design, implement, ...",Wireless network design and architecture Wi-Fi...,"Design, configure, and optimize wireless netwo..."
4,Event Manager,A Conference Manager coordinates and manages c...,Event planning Conference logistics Budget man...,Specialize in conference and convention planni...
...,...,...,...,...
134,Account Executive,A Key Account Executive manages strategic acco...,Key account management Sales strategy developm...,"Focus on strategic accounts, serving as the pr..."
135,Speech Therapist,Adult Speech Therapists work with adults to ad...,Adult speech therapy Speech-language assessmen...,Focus on speech and language therapy for adult...
136,HR Coordinator,Benefits Coordinators administer employee bene...,Employee benefits administration Benefits comp...,"Administer employee benefits programs, respond..."
137,Architectural Designer,Interior Designers plan and design interior sp...,Interior design Space planning Color theory Ma...,"Focus on interior spaces, selecting finishes, ..."


In [4]:
class JobDataset(tf.keras.utils.Sequence):
    def __init__(self, job_titles, job_skills, tokenizer, max_length, batch_size):
        self.job_titles = job_titles
        self.job_skills = job_skills
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.batch_size = batch_size

    def __len__(self):
        return len(self.job_titles) // self.batch_size

    def __getitem__(self, idx):
        batch_job_titles = self.job_titles[idx * self.batch_size: (idx + 1) * self.batch_size]
        batch_job_skills = self.job_skills[idx * self.batch_size: (idx + 1) * self.batch_size]

        batch_input_texts = [f"job title: {title}" for title in batch_job_titles]
        batch_target_texts = batch_job_skills

        batch_input_encodings = self.tokenizer(batch_input_texts, padding=True, truncation=True, return_tensors="tf", max_length=self.max_length)
        batch_target_encodings = self.tokenizer(batch_target_texts, padding=True, truncation=True, return_tensors="tf", max_length=self.max_length)

        return {
            "input_ids": batch_input_encodings["input_ids"],
            "attention_mask": batch_input_encodings["attention_mask"],
            "labels": batch_target_encodings["input_ids"]
        }

In [5]:
job_titles = data['Job Title'].tolist()  
job_descriptions = data['Job Description'].tolist()
job_skills = data['skills'].tolist()  


In [6]:
max_length = 128
batch_size = 8
tokenizer = T5Tokenizer.from_pretrained("t5-small")
tokenizer.pad_token = tokenizer.eos_token
train_dataset = JobDataset(job_titles, job_skills, tokenizer, max_length, batch_size)

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [7]:
model = TFT5ForConditionalGeneration.from_pretrained("t5-small")

loss_object = SparseCategoricalCrossentropy(from_logits=True)
optimizer = Adam(learning_rate=1e-4)





All PyTorch model weights were used when initializing TFT5ForConditionalGeneration.

All the weights of TFT5ForConditionalGeneration were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFT5ForConditionalGeneration for predictions without further training.


In [8]:
@tf.function
def train_step(batch):
    with tf.GradientTape() as tape:
        outputs = model(input_ids=batch["input_ids"], attention_mask=batch["attention_mask"], labels=batch["labels"], training=True)
        loss = loss_object(tf.reshape(batch["labels"], (-1,)), tf.reshape(outputs.logits, (-1, outputs.logits.shape[-1])))

    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    return loss


In [9]:
num_epochs = 2

for epoch in range(num_epochs):
    total_loss = 0
    for batch in train_dataset:
        batch_loss = train_step(batch)
        total_loss += batch_loss

    avg_loss = total_loss / len(train_dataset)
    print(f"Epoch {epoch + 1}, Average Loss: {avg_loss:.4f}")

Epoch 1, Average Loss: 4.1200
Epoch 2, Average Loss: 3.2417


In [17]:
job_data_dict = dict(zip(job_titles, zip(job_descriptions, job_skills)))
user_input = input("Enter a job title: ")
print("\n")
user_job_title = user_input.strip().title()
if user_job_title in job_data_dict:
    
    job_description, job_skills_responsibilities = job_data_dict[user_job_title]
    print(f"Job Description for {user_job_title}:\n{job_description}\n")
    print(f"Skills needed for {user_job_title}:")
    print(job_skills_responsibilities)
else:
    print("Job title not found in the dataset.")

Enter a job title: Software Engineer


Job Description for Software Engineer:
A Backend Developer is tasked with building and maintaining the server-side logic and database infrastructure of web applications. They work on the core functionality, data storage, and server communication to ensure the application runs smoothly and efficiently.

Skills needed for Software Engineer:
Proficiency in one or more backend programming languages (e.g., Java, Python, Node.js, Ruby) API development Database management (SQL or NoSQL) Server management and deployment Security best practices Knowledge of web frameworks (e.g., Express, Django)
