In [2]:
import pandas as pd
import nltk
import ast
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
from kanren import run, var, Relation, facts

# Set your NLTK path

# Uncomment these lines only once to download NLTK resources
# nltk.download('punkt')
# nltk.download('wordnet')
# nltk.download('stopwords')

# Load and preprocess the dataset
def load_dataset(file_path):
    df = pd.read_csv(file_path)
    df['hard_skill'] = df['hard_skill'].apply(ast.literal_eval)
    df['soft_skill'] = df['soft_skill'].apply(ast.literal_eval)
    return df

# NLP Processing
class SkillExtractor:
    def __init__(self):
        self.lemmatizer = WordNetLemmatizer()
        self.stop_words = set(stopwords.words('english'))

    def process_text(self, text):
        tokens = word_tokenize(text.lower())
        tokens = [t for t in tokens if t.isalpha() and t not in self.stop_words]
        tokens = [self.lemmatizer.lemmatize(t) for t in tokens]
        return tokens

    def extract_skills(self, text, dataset_skills):
        processed_tokens = self.process_text(text)
        extracted_skills = []
        for token in processed_tokens:
            for skill in dataset_skills:
                skill_tokens = [self.lemmatizer.lemmatize(s.lower()) for s in skill.split()]
                if any(fuzz.ratio(token, st) > 80 for st in skill_tokens):
                    extracted_skills.append(skill)
        return list(set(extracted_skills))

# Kanren-based Expert System with scoring fix
def career_guidance_system(dataset, input_text):
    skill_extractor = SkillExtractor()

    # Collect all unique skills for matching
    all_hard_skills = set()
    all_soft_skills = set()
    for _, row in dataset.iterrows():
        all_hard_skills.update(row['hard_skill'])
        all_soft_skills.update(row['soft_skill'])

    # Extract skills from input
    extracted_hard_skills = skill_extractor.extract_skills(input_text, all_hard_skills)
    extracted_soft_skills = skill_extractor.extract_skills(input_text, all_soft_skills)
    
    print(f"Extracted Hard Skills: {extracted_hard_skills}")
    print(f"Extracted Soft Skills: {extracted_soft_skills}")

    # Define Kanren relations
    career_match = Relation()

    # Populate facts from dataset
    for _, row in dataset.iterrows():
        facts(career_match, (row['candidate_field'], tuple(row['hard_skill']), tuple(row['soft_skill']), row['label']))

    # Kanren query
    field = var()
    hard_skills_var = var()
    soft_skills_var = var()
    label_var = var()
    results = run(0, field, career_match(field, hard_skills_var, soft_skills_var, label_var))

    # Define scoring function
    def calculate_score(extracted_hard, extracted_soft, dataset_hard, dataset_soft, label_val):
        hard_match = len(set(extracted_hard) & set(dataset_hard))
        soft_match = len(set(extracted_soft) & set(dataset_soft))
        score = hard_match * 2 + soft_match
        if label_val == 1:
            score += 2
        return score

    # Score and find best match
    best_field = None
    max_score = -1

    for f in set(results):
        rows = dataset[dataset['candidate_field'] == f]
        for _, row in rows.iterrows():
            score = calculate_score(
                extracted_hard_skills,
                extracted_soft_skills,
                row['hard_skill'],
                row['soft_skill'],
                row['label']
            )
            if score > max_score:
                max_score = score
                best_field = f

    return best_field if best_field else "No suitable field found"

# Main runner
def run_career_guidance(input_text, dataset_path):
    dataset = load_dataset(dataset_path)
    recommended_field = career_guidance_system(dataset, input_text)
    print(f"A suitable candidate field for you would be {recommended_field}.")


In [3]:
# Example usage
if __name__ == "__main__":
    dataset_path = "Career Guidance Expert System.csv"
    input_text = "I am skilled in software development, data analysis, and have strong communication skills."
    run_career_guidance(input_text, dataset_path)

LookupError: 
**********************************************************************
  Resource [93mstopwords[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('stopwords')
  [0m
  For more information see: https://www.nltk.org/data.html

  Attempted to load [93mcorpora/stopwords[0m

  Searched in:
    - 'C:\\Users\\abdel/nltk_data'
    - 'c:\\Abdelrahman\\Materiels ASU\\cognitive science\\Project\\myenv\\nltk_data'
    - 'c:\\Abdelrahman\\Materiels ASU\\cognitive science\\Project\\myenv\\share\\nltk_data'
    - 'c:\\Abdelrahman\\Materiels ASU\\cognitive science\\Project\\myenv\\lib\\nltk_data'
    - 'C:\\Users\\abdel\\AppData\\Roaming\\nltk_data'
    - 'C:\\nltk_data'
    - 'D:\\nltk_data'
    - 'E:\\nltk_data'
**********************************************************************
