In [1]:
import pandas as pd
from colorama import Fore, Style, init
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
import pandas as pd
from colorama import Fore, Style
from better_profanity import profanity


In [2]:
#Understanding the dataset
file_path = '/Users/louisvsbigmac/Documents/Office 365/xlsx/Creative Careers.xlsx' 
spreadsheet = pd.ExcelFile(file_path)
df = spreadsheet.parse('Sheet1')
num_jobs = df.shape[0]
jobs_per_discipline = df['Discipline'].value_counts()
print(f'Total number of jobs: {num_jobs}')
print('Number of jobs per discipline:')
print(jobs_per_discipline)

Total number of jobs: 464
Number of jobs per discipline:
Discipline
Moving Image & Theatre           103
Arts & Crafts                     76
Data and R&D                      60
Culture & Heritage                54
Marketing                         49
Fashion & Textiles                35
Music & Audio                     35
Games & 3D                        22
Architecture & Interiors          15
Journalism & Creative Writing     15
Name: count, dtype: int64


In [3]:
#Further previewing the dataset
grouped = df.groupby(['Discipline', 'Process', 'Function'])['Job'].apply(list).reset_index()
for discipline, discipline_df in grouped.groupby('Discipline'):
    print(f"Discipline: {discipline}")
    for _, row in discipline_df.iterrows():
        process = row['Process']
        function = row['Function']
        jobs = row['Job']
        print(f"  Process: {process}")
        print(f"    Function: {function}")
        for job in jobs:
            print(f"      Job: {job}")
    print()

Discipline: Architecture & Interiors
  Process: Creatives
    Function: Architects
      Job: Architect
      Job: Interior architect
      Job: Landscape architect
      Job: Naval architect
  Process: Creatives
    Function: Artists/Photographers
      Job: Modelling artist
  Process: Creatives
    Function: Builders
      Job: Furniture maker
      Job: Model maker
  Process: Creatives
    Function: Designers
      Job: Furniture designer
      Job: Interior designer
      Job: Kitchen and bathroom designer
  Process: Creatives
    Function: Tailors
      Job: Upholsterer
  Process: Facilitators
    Function: Technicians/Engineers/Recorders
      Job: Architectural technician
      Job: Build engineer
      Job: Furniture restorer
  Process: Researchers
    Function: Technologists/Researchers
      Job: Architectural technologist

Discipline: Arts & Crafts
  Process: Creatives
    Function: Artists/Photographers
      Job: Calligrapher
      Job: Concept artist
      Job: Drawer/ske

In [5]:
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
import pandas as pd
from colorama import Fore, Style
from better_profanity import profanity
# Custom list of offensive words
custom_offensive_words = [
    'racist', 'idiot', 'stupid', 'dumb', 'moron', 'jerk', 'asshole', 'bastard',
    'dickhead', 'shithead', 'retard', 'loser', 'freak', 'creep', 'scumbag',
    # More offensive words are needed!
]
# Load the custom list into the profanity filter
profanity.load_censor_words(custom_offensive_words)


# Extended keyword-to-job mapping for holistic recommendations
keyword_to_job = {
    'wood': ['Carpenter', 'Woodworker', 'Furniture maker'],
    'data': ['Data analyst', 'Data scientist', 'Statistician'],
    'architecture': ['Architect', 'Architectural technologist'],
    'science': ['Scientist', 'Researcher', 'Data scientist', 'Biologist', 'Chemist'],
    'engineering': ['Engineer', 'Mechanical engineer', 'Electrical engineer', 'Civil engineer'],
    # More keyword-to-job mappings are needed!
}


def fuzzy_match(prompt, choices, score_threshold=80):
    matches = process.extract(prompt, choices, limit=10, scorer=fuzz.partial_ratio)
    return [match for match in matches if match[1] >= score_threshold]

def recommend_jobs(prompt, df):
    prompt = prompt.lower()
    
    # Check if the prompt contains offensive words
    if profanity.contains_profanity(prompt):
        return "The prompt contains offensive language. Please try again with appropriate language."
    
    # Check if the prompt matches any keywords in the mapping
    if prompt in keyword_to_job:
        related_jobs = keyword_to_job[prompt]
        matching_jobs = df[df['Job'].str.lower().isin([job.lower() for job in related_jobs])]
        if not matching_jobs.empty:
            return matching_jobs[['Discipline', 'Process', 'Function', 'Job']]
    
    # Extract unique values for each column to compare against
    disciplines = df['Discipline'].str.lower().unique()
    functions = df['Function'].str.lower().unique()
    processes = df['Process'].str.lower().unique()
    jobs = df['Job'].str.lower().unique()
    
    # Perform fuzzy matching on each column
    matching_disciplines = fuzzy_match(prompt, disciplines)
    matching_functions = fuzzy_match(prompt, functions)
    matching_processes = fuzzy_match(prompt, processes)
    matching_jobs = fuzzy_match(prompt, jobs)
    
    # Collect all matches found
    matches = set()
    for match in matching_disciplines:
        matches.update(df[df['Discipline'].str.lower() == match[0]].index)
    for match in matching_functions:
        matches.update(df[df['Function'].str.lower() == match[0]].index)
    for match in matching_processes:
        matches.update(df[df['Process'].str.lower() == match[0]].index)
    for match in matching_jobs:
        matches.update(df[df['Job'].str.lower() == match[0]].index)
    
    # If no matches are found, return a message indicating no matches
    if not matches:
        return f"No jobs found matching the prompt: {prompt}"
    
    # Convert the set of matches to a list before using it as an indexer
    matching_jobs = df.loc[list(matches), ['Discipline', 'Process', 'Function', 'Job']]
    return matching_jobs

# Simple command-line interface for user interaction
def main():
    print(Fore.CYAN + "Welcome to the Job Recommendation App!" + Style.RESET_ALL)
    print("Enter a prompt to find appropriate jobs (e.g., 'Manager', 'Technologist'): ")
    
    while True:
        prompt = input(Fore.YELLOW + "Enter your prompt (or 'exit' to quit): " + Style.RESET_ALL)
        if prompt.lower() == 'exit':
            print(Fore.CYAN + "Thank you for using the Job Recommendation App. Goodbye!" + Style.RESET_ALL)
            break
        
        recommendations = recommend_jobs(prompt, df)
        
        print(Fore.GREEN + "\nRecommended Jobs:" + Style.RESET_ALL)
        if isinstance(recommendations, str):
            print(Fore.RED + recommendations + Style.RESET_ALL)
        else:
            for _, row in recommendations.iterrows():
                print(Fore.BLUE + f"Discipline: {row['Discipline']}" + Style.RESET_ALL)
                print(Fore.MAGENTA + f"  Process: {row['Process']}" + Style.RESET_ALL)
                print(Fore.YELLOW + f"    Function: {row['Function']}" + Style.RESET_ALL)
                print(Fore.CYAN + f"      Job: {row['Job']}" + Style.RESET_ALL)
                print(Fore.WHITE + "-"*40 + Style.RESET_ALL)  # Separator for better readability

if __name__ == "__main__":
    # Load the dataset
   
    file_path = '/Users/louisvsbigmac/Documents/Office 365/xlsx/Creative Careers.xlsx' 
    df = pd.read_excel(file_path, sheet_name='Sheet1')
    main()


[36mWelcome to the Job Recommendation App![0m
Enter a prompt to find appropriate jobs (e.g., 'Manager', 'Technologist'): 
Enter your prompt (or 'exit' to quit): car
[32m
Recommended Jobs:[0m
[34mDiscipline: Arts & Crafts[0m
[35m  Process: Creatives[0m
[33m    Function: Builders[0m
[36m      Job: Wax carver[0m
[37m----------------------------------------[0m
[34mDiscipline: Culture & Heritage[0m
[35m  Process: Directors[0m
[33m    Function: Managers/Supervisors/Coordinators[0m
[36m      Job: Career service manager[0m
[37m----------------------------------------[0m
[34mDiscipline: Culture & Heritage[0m
[35m  Process: Reactors[0m
[33m    Function: Archivers/Restorers/Conservers[0m
[36m      Job: Healthcare librarian[0m
[37m----------------------------------------[0m
[34mDiscipline: Arts & Crafts[0m
[35m  Process: Creatives[0m
[33m    Function: Builders[0m
[36m      Job: Theatre carpenter[0m
[37m----------------------------------------[0m
[34mDis