# # Career and Industry Suggestion System

#This notebook helps suggest the best career domain and industry based on the user's work experience and skills.


# Import Libraries 

In [54]:
import csv
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

Ensure necessary NLTK data files are downloaded

In [None]:
# Ensure necessary NLTK data files are downloaded
nltk.download('punkt')
nltk.download('stopwords')

# ### Function to Read CSV into Dictionary

In [None]:
def read_csv_to_dict(file_path):
    """
    Reads a CSV file and converts it into a dictionary.
    
    Parameters:
        file_path (str): The path to the CSV file.
    
    Returns:
        dict: A dictionary with the first column as keys and the second column as a list of keywords.
    """
    data_dict = {}
    with open(file_path, 'r') as file:
        reader = csv.reader(file)
        next(reader)  # Skip the header row
        for row in reader:
            key = row[0]
            keywords = row[1].split()
            data_dict[key] = keywords
    return data_dict


# ### Function to Tokenize and Filter Text

In [None]:
def tokenize_and_filter(text):
    """
    Tokenizes the input text and removes stopwords.
    
    Parameters:
        text (str): The input text to tokenize.
    
    Returns:
        list: A list of filtered tokens.
    """
    # Tokenize the text
    tokens = word_tokenize(text.lower())
    
    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    filtered_tokens = [token for token in tokens if token not in stop_words]
    
    return filtered_tokens


# ### Function to Analyze Text

In [None]:
def analyze_text(tokens, categories):
    """
    Analyzes tokens to compute scores for each category based on keyword matches.
    
    Parameters:
        tokens (list): The list of tokens to analyze.
        categories (dict): A dictionary with categories as keys and lists of keywords as values.
    
    Returns:
        dict: A dictionary with categories as keys and scores as values.
    """
    scores = {category: 0 for category in categories}
    
    for category, keywords in categories.items():
        scores[category] = sum(token in tokens for token in keywords)
    
    return scores

# ### Function to Suggest Career and Industry

In [None]:
def suggest_career_and_industry(user_text, work_exp, career_domains, industries):
    """
    Suggests the best career domain and industry based on the user's input text and work experience.
    
    Parameters:
        user_text (str): The user's input text describing their work experience and skills.
        work_exp (dict): A dictionary with domains as keys and years of experience as values.
        career_domains (dict): A dictionary of career domains and associated keywords.
        industries (dict): A dictionary of industries and associated keywords.
    
    Returns:
        tuple: The suggested career domain and industry.
    """
    # Tokenize and filter user text
    filtered_tokens = tokenize_and_filter(user_text)
    
    # Analyze text for career domains and industries
    career_scores = analyze_text(filtered_tokens, career_domains)
    industry_scores = analyze_text(filtered_tokens, industries)
    
    # Determine the best career and industry
    best_career = max(career_scores, key=career_scores.get)
    best_industry = max(industry_scores, key=industry_scores.get)
    
    # Adjust career suggestion based on work experience
    if best_career == 'non-tech-enterprises':
        for domain, experience in work_exp.items():
            if domain == 'non-tech-enterprises' and experience >= 5:
                best_career = 'entrepreneurship'
                break
    
    return best_career, best_industry


# Main Function to Execute the Workflow

In [55]:
def main():
    # ### Load Data from CSV Files
    career_domains = read_csv_to_dict('career_domains.csv')
    industries = read_csv_to_dict('industries.csv')
    
    # ### Get User Input
    user_text = input("Enter your work experience and skills: ")
    num_domains = int(input("Enter the number of domains you have worked in: "))
    
    work_exp = {}
    for i in range(num_domains):
        domain = input(f'Enter domain {i+1} of work experience: ')
        experience = int(input(f"Enter the number of years of work experience in domain {i+1}: "))
        work_exp[domain] = experience
    
    # ### Perform Analysis and Suggest Career Options
    best_career, best_industry = suggest_career_and_industry(user_text, work_exp, career_domains, industries)
    
    # ### Print the Suggested Career Option and Industry
    print("Based on your input, the suggested career option is:", best_career)
    print("The suggested industry is:", best_industry)

# ## Step 4: Execute the Main Function
if __name__ == "__main__":
    main()

Enter your work experience and skills: agriculture
Enter the number of domains you have worked in: 3
Enter the 1 domain of work experience: agriculture
Enter the number of years of work experience in 1 domain: 5
Enter the 2 domain of work experience: bussiness
Enter the number of years of work experience in 2 domain: 3
Enter the 3 domain of work experience: marketing
Enter the number of years of work experience in 3 domain: 3
Based on your input, the suggested career option is: agritech
The suggested industry is: advertising & marketing
