## Import Required Packages

In [1]:
import csv
import json

## Extract Skills from Job description

In [2]:
from typing import Dict, List


def extract_skills_from_csv(csv_file: str) -> Dict[str, List[str]]:
    """
    Extracts skills from a CSV file containing job titles and associated skills.

    Args:
    csv_file (str): Path to the CSV file.

    Returns:
    Dict[str, List[str]]: A dictionary where keys are job titles and values are lists of skills.
    """
    # Initialize an empty dictionary to store job titles and associated skills
    job_skills = {}

    # Open the CSV file in read mode
    with open(csv_file, "r") as file:
        # Create a CSV DictReader object
        reader = csv.DictReader(file)

        # Iterate over each row in the CSV file
        for row in reader:
            # Extract the job title from the current row
            title = row["Job Title"]

            # Extract skills from the current row and split them by comma and space
            skills = row["Skill"].split(", ")

            # Store the job title and associated skills in the dictionary
            job_skills[title] = skills

    # Return the dictionary containing job titles and associated skills
    return job_skills

## Generate Intent Patterns

In [3]:
def generate_json_structure(
    job_skills: Dict[str, List[str]]
) -> Dict[str, List[Dict[str, List[str]]]]:
    """
    Generates a JSON structure representing intents and responses based on job titles and associated skills.

    Args:
    job_skills (Dict[str, List[str]]): A dictionary where keys are job titles and values are lists of skills.

    Returns:
    Dict[str, List[Dict[str, List[str]]]]: A dictionary representing intents and responses.
    """
    # Initialize an empty list to store intents
    intents = []

    # Iterate over each job title and associated skills
    for job_title, skills in job_skills.items():
        # Join the skills into a formatted string
        skills_list = "\n".join([f"- {skill}" for skill in skills])

        # Create an intent dictionary
        intent = {
            "tag": job_title.lower().replace(" ", "_"),
            "patterns": [
                f"What skills are required for {job_title}?",
                f"Skills for {job_title}",
                f"What are the prerequisites for {job_title}?",
                f"List the required skills for {job_title}",
                f"Which skills do I need to become a {job_title}?",
            ],
            "responses": [
                f"To become a {job_title}, you'll need the following skills:\n{skills_list}.",
                f"The essential skills for {job_title} include:\n{skills_list}.",
                f"Here are the skills required to excel as a {job_title}:\n{skills_list}.",
                f"Here's a list of skills you'll need for {job_title}:\n{skills_list}.",
            ],
        }

        # Append the intent to the list of intents
        intents.append(intent)

    # Return the JSON structure
    return {"intents": intents}

## Save the JSON data

In [4]:
# Define the path to the CSV file containing job titles and associated skills
csv_file = "../data/keywords.csv"

# Define the path to the JSON file where the intents will be stored
json_file = "../data/intents.json"

# Extract skills from the CSV file and store them in a dictionary
job_skills = extract_skills_from_csv(csv_file)

# Generate a JSON structure representing intents and responses based on the extracted skills
intents_json = generate_json_structure(job_skills)

# Write the JSON structure to a file
with open(json_file, "w") as outfile:
    json.dump(intents_json, outfile, indent=2)
print("Conversion completed. JSON file has been generated.")

Conversion completed. JSON file has been generated.
