In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.metrics.pairwise import cosine_similarity
from typing import Dict, Tuple


In [2]:
df = pd.read_csv('../../Datasets/Final Datasets/model_dataset.csv')
df.head()

Unnamed: 0,O*NET-SOC Code,Title,Attribute Type,Element ID,Element Name,Scale Name,Data Value
0,11-1011.00,Chief Executives,Knowledge,2.C.1.a,Administration and Management,Importance,4.78
1,11-1011.00,Chief Executives,Knowledge,2.C.1.a,Administration and Management,Level,6.5
2,11-1011.00,Chief Executives,Knowledge,2.C.1.b,Administrative,Importance,2.42
3,11-1011.00,Chief Executives,Knowledge,2.C.1.b,Administrative,Level,2.69
4,11-1011.00,Chief Executives,Knowledge,2.C.1.c,Economics and Accounting,Importance,4.04


In [3]:

df.info()

df.describe()

df.isnull().sum()

df.columns

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 391425 entries, 0 to 391424
Data columns (total 7 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   O*NET-SOC Code  391425 non-null  object 
 1   Title           391425 non-null  object 
 2   Attribute Type  391425 non-null  object 
 3   Element ID      391425 non-null  object 
 4   Element Name    391425 non-null  object 
 5   Scale Name      391425 non-null  object 
 6   Data Value      391425 non-null  float64
dtypes: float64(1), object(6)
memory usage: 20.9+ MB


Index(['O*NET-SOC Code', 'Title', 'Attribute Type', 'Element ID',
       'Element Name', 'Scale Name', 'Data Value'],
      dtype='object')

## Model Building Plan

The plan is to build a recommendation system that recommends users different positions based on their Knowledge, Skills, Abilities, Education, Experience, Training, Interests, Work Values, Work Styles, and Work Context. These are all the attributes that each job/position has. 

There is a data value associated with each attribute for each position. The value is between 0 and 100. 0 means the position does not require the attribute and 100 means the position requires the attribute. 


In [4]:
df['Attribute Type'].unique()

array(['Knowledge', 'Skill', 'Ability', 'Education', 'Experience',
       'Training', 'Interest', 'Work Values', 'Work Styles',
       'Work Activity', 'Work Context'], dtype=object)

In [5]:
job_matrix = df.pivot_table(values='Data Value', 
                            index=['O*NET-SOC Code', 'Title'], 
                            columns=['Attribute Type', 'Element Name', 'Scale Name'])
job_matrix = job_matrix.fillna(0)

def get_recommendations(user_profile, n=5):
    user_vector = pd.Series(user_profile, index=job_matrix.columns).fillna(0)
    similarities = cosine_similarity(user_vector.values.reshape(1, -1), job_matrix.values)
    top_indices = similarities[0].argsort()[::-1][:n]
    recommendations = job_matrix.index[top_indices]
    
    # Format the output
    for code, title in recommendations:
        print(f"O*NET-SOC Code: {code}, Title: {title}")

def get_unique_attributes(job_matrix):
    return [col[:2] for col in job_matrix.columns.tolist()]

def get_user_input(unique_attributes):
    user_profile = {}
    print("Please rate your level of expertise/interest for the following attributes:")
    print("Use a scale of 0-5, where 0 is no expertise/interest and 5 is high expertise/interest.")
    
    for attr_type, attr_name in unique_attributes:
        while True:
            try:
                importance = float(input(f"{attr_type} - {attr_name} (Importance): "))
                level = float(input(f"{attr_type} - {attr_name} (Level): "))
                if 0 <= importance <= 5 and 0 <= level <= 5:
                    user_profile[(attr_type, attr_name, 'Importance')] = importance
                    user_profile[(attr_type, attr_name, 'Level')] = level
                    break
                else:
                    print("Please enter a value between 0 and 5.")
            except ValueError:
                print("Please enter a valid number.")
    
    return user_profile

def format_user_input(user_profile: Dict[Tuple[str, str, str], float], job_matrix) -> pd.Series:
    formatted_profile = pd.Series(index=job_matrix.columns, dtype=float)
    for key, value in user_profile.items():
        if key in formatted_profile.index:
            formatted_profile[key] = value
    return formatted_profile.fillna(0)

# Update your get_recommendations function
def get_recommendations(user_profile, job_matrix, n=5):
    user_vector = format_user_input(user_profile, job_matrix)
    similarities = cosine_similarity(user_vector.values.reshape(1, -1), job_matrix.values)
    top_indices = similarities[0].argsort()[::-1][:n]
    recommendations = job_matrix.index[top_indices]
    
    print("Recommended jobs:")
    for code, title in recommendations:
        print(f"O*NET-SOC Code: {code}, Title: {title}")

def create_sample_user_profile(unique_attributes):
    user_profile = {}
    for attr_type, attr_name in unique_attributes:
        # Generate random values between 0 and 5 for testing
        importance = round(np.random.uniform(0, 5), 2)
        level = round(np.random.uniform(0, 5), 2)
        user_profile[(attr_type, attr_name, 'Importance')] = importance
        user_profile[(attr_type, attr_name, 'Level')] = level
    return user_profile

# Modify the main execution part
# Main execution
unique_attributes = get_unique_attributes(job_matrix)
# user_profile = get_user_input(unique_attributes)  # Comment out this line
user_profile = create_sample_user_profile(unique_attributes)  # Use this instead
get_recommendations(user_profile, job_matrix)

# Optionally, print out the user profile for verification
print("\nSample User Profile:")
for key, value in user_profile.items():
    print(f"{key}: {value}")

Recommended jobs:
O*NET-SOC Code: 11-9013.00, Title: Farmers, Ranchers, and Other Agricultural Managers
O*NET-SOC Code: 17-3023.00, Title: Electrical and Electronic Engineering Technologists and Technicians
O*NET-SOC Code: 17-3026.01, Title: Nanotechnology Engineering Technologists and Technicians
O*NET-SOC Code: 17-2111.00, Title: Health and Safety Engineers, Except Mining Safety Engineers and Inspectors
O*NET-SOC Code: 17-2121.00, Title: Marine Engineers and Naval Architects

Sample User Profile:
('Ability', 'Arm-Hand Steadiness', 'Importance'): 1.37
('Ability', 'Arm-Hand Steadiness', 'Level'): 1.3
('Ability', 'Auditory Attention', 'Importance'): 0.45
('Ability', 'Auditory Attention', 'Level'): 0.95
('Ability', 'Category Flexibility', 'Importance'): 4.48
('Ability', 'Category Flexibility', 'Level'): 3.11
('Ability', 'Control Precision', 'Importance'): 3.86
('Ability', 'Control Precision', 'Level'): 3.16
('Ability', 'Deductive Reasoning', 'Importance'): 3.42
('Ability', 'Deductive Rea

In [6]:
df['Title'].unique().shape

(923,)