In [None]:
import pandas as pd
import numpy as np

In [None]:
data = pd.read_excel('project_data/2023-33/skills.xlsx',sheet_name = 2, skiprows=1)
data = data[1:833]
data

### Best Job

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler
import tkinter as tk
from tkinter import ttk, messagebox

df = data.copy()
skills_columns = df.columns[8:]
features = df[skills_columns]
scaler = MinMaxScaler()
def find_similar_jobs(skills):
    user_data = pd.DataFrame(skills, index=['user'])
    user_vector = user_data.values.flatten()
    similarity_scores = cosine_similarity(user_vector.reshape(1, -1), features)
    top_5_indices = similarity_scores.argsort()[0][-5:][::-1]
    top_5_jobs = df.iloc[top_5_indices]
    top_5_jobs['Similarity'] = similarity_scores[0][top_5_indices]
    return top_5_jobs[['2023 National Employment Matrix title', 'Median annual wage, dollars, 2023[1]', 'Typical education needed for entry', 'Similarity', '2023 National Employment Matrix code']]

def get_user_input():
    print("Please enter your details:")
    #education = input("Education level: ")
    skills = {}
    for skill in skills_columns:
        rating = float(input(f"Rate your {skill} (0-5): "))
        skills[skill] = rating
    #return education, skills
    return skills

# if __name__ == "__main__":
#     user_skills = get_user_input()
#     similar_jobs = find_similar_jobs(user_skills)
    
#     print("\nTop 5 recommended jobs based on your profile:")
#     print(similar_jobs.to_string(index=False))
def recommend_jobs():
    try:
        user_skills = {skill: float(skill_inputs[skill].get()) for skill in skills_columns}
        for value in user_skills.values():
            if value < 0 or value > 5:
                raise ValueError("Skill ratings must be between 0 and 5.")

        recommended_jobs = find_similar_jobs(user_skills)

        result_text.set("")
        for _, row in recommended_jobs.iterrows():
            result_text.set(result_text.get() + f"{row['2023 National Employment Matrix title']} "
                            f"(${row['Median annual wage, dollars, 2023[1]']:.2f}) - "
                            f"{row['Typical education needed for entry']} - Similarity: {row['Similarity']:.2f}\n")
    except ValueError as e:
        messagebox.showerror("Invalid Input", str(e))
    except Exception as e:
        messagebox.showerror("Error", str(e))

root = tk.Tk()
root.title("Job Recommendation System")
root.geometry("600x400")

# Input section
tk.Label(root, text="Rate your skills (0-5):").pack(pady=10)

skill_inputs = {}
for skill in skills_columns:
    frame = tk.Frame(root)
    frame.pack(anchor='w', padx=20)
    tk.Label(frame, text=f"{skill}: ").pack(side="left")
    entry = tk.Entry(frame, width=5)
    entry.pack(side="left")
    skill_inputs[skill] = entry 

# Recommendation button
tk.Button(root, text="Recommend Jobs", command=recommend_jobs).pack(pady=20)

# Output 
result_text = tk.StringVar()
tk.Label(root, text="Recommended Jobs:", anchor="w").pack(fill="x", padx=20)
result_label = ttk.Label(root, textvariable=result_text, anchor="w", justify="left", wraplength=550)
result_label.pack(fill="both", padx=20, pady=10)
root.mainloop()

### Best Job Category

In [None]:
data1 = pd.read_excel('project_data/2023-33/skills.xlsx',sheet_name = 1, skiprows=1)
data1 = data1[1:23]

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler

df = data1.copy()
skills_columns = df.columns[8:]
features = df[skills_columns]
scaler = MinMaxScaler()

def find_similar_category(skills):
    user_data = pd.DataFrame(skills, index=['user'])
    user_vector = user_data.values.flatten()
    similarity_scores = cosine_similarity(user_vector.reshape(1, -1), features)
    top_indices = similarity_scores.argsort()[0][-1:][::-1]
    top_jobs = df.iloc[top_indices].copy()
    top_jobs['Similarity'] = similarity_scores[0][top_indices]
    return top_jobs[['2023 National Employment Matrix title', 'Similarity', '2023 National Employment Matrix code']]

def get_user_input():
    print("Please enter your details:")
    skills = {}
    for skill in skills_columns:
        rating = float(input(f"Rate your {skill} (0-5): "))
        skills[skill] = rating
    return skills

user_skills = get_user_input()
similar_category = find_similar_category(user_skills)

print("\nTop recommended job category based on your profile:")
print(similar_category.to_string(index=False))
matrix_code = similar_category.iloc[0]['2023 National Employment Matrix code']
data2 = data.copy()
data2 = data2[data2['2023 National Employment Matrix code'].str[:2] == str(matrix_code[:2])]

# def find_similar_job(skills):
#     user_data = pd.DataFrame(skills, index=['user'])
#     user_vector = user_data.values.flatten()
#     similarity_scores = cosine_similarity(user_vector.reshape(1, -1), features)
#     top_indices = similarity_scores.argsort()[0][-1:][::-1]
#     top_jobs = df.iloc[top_indices].copy()
#     top_jobs['Similarity'] = similarity_scores[0][top_indices]
#     return top_jobs[['2023 National Employment Matrix title', 'Similarity', '2023 National Employment Matrix code']]

# similar_job = find_similar_job(user_skills)
# print("\nTop recommended jobs based on your profile:")
# print(similar_job.to_string(index=False))

def find_best_job(data2, user_skills):
    skill_columns = data2.columns[8:] 
    aligned_user_skills = {col: user_skills.get(col, 0) for col in skill_columns}
    features = data2[skill_columns]
    scaler = MinMaxScaler()
    scaled_features = scaler.fit_transform(features)
    user_vector = pd.DataFrame(aligned_user_skills, index=['user']).values.flatten()
    scaled_user_vector = scaler.transform(user_vector.reshape(1, -1))
    similarity_scores = cosine_similarity(scaled_user_vector, scaled_features)
    top_index = similarity_scores.argsort()[0][-1]
    best_job = data2.iloc[top_index].copy()
    best_job['Similarity'] = similarity_scores[0][top_index]
    return best_job[['2023 National Employment Matrix title', 'Similarity', '2023 National Employment Matrix code']]

best_job = find_best_job(data2, user_skills)
title = best_job['2023 National Employment Matrix title']
similarity = best_job['Similarity']
matrix_code = best_job['2023 National Employment Matrix code']
print(f"\nBest recommended job based on your profile: {title} (Similarity: {similarity:.2f}, Code: {matrix_code})")

## Best Job intergrated with UI

In [None]:
import sys
from PyQt5.QtWidgets import QApplication, QWidget, QVBoxLayout, QHBoxLayout, QLabel, QLineEdit, QPushButton, QTextEdit, QMessageBox
from PyQt5.QtCore import Qt
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler
import pandas as pd

df = data.copy()
skills_columns = df.columns[8:]
features = df[skills_columns]

class JobRecommendationApp(QWidget):
    def __init__(self):
        super().__init__()
        self.initUI()

    def initUI(self):
        self.setWindowTitle('Job Recommendation System')
        self.setGeometry(100, 100, 600, 400)

        layout = QVBoxLayout()

        # Input section
        input_layout = QVBoxLayout()
        input_layout.addWidget(QLabel('Rate your skills (0-5):'))

        self.skill_inputs = {}
        for skill in skills_columns:
            skill_layout = QHBoxLayout()
            skill_layout.addWidget(QLabel(f"{skill}:"))
            skill_input = QLineEdit()
            skill_input.setFixedWidth(50)
            skill_layout.addWidget(skill_input)
            skill_layout.addStretch()
            input_layout.addLayout(skill_layout)
            self.skill_inputs[skill] = skill_input

        layout.addLayout(input_layout)

        # Recommendation button
        recommend_button = QPushButton('Recommend Jobs')
        recommend_button.clicked.connect(self.recommend_jobs)
        layout.addWidget(recommend_button)

        # Output section
        layout.addWidget(QLabel('Recommended Jobs:'))
        self.result_text = QTextEdit()
        self.result_text.setReadOnly(True)
        layout.addWidget(self.result_text)

        self.setLayout(layout)

    def recommend_jobs(self):
        try:
            # Collect user skill ratings
            user_skills = {skill: float(self.skill_inputs[skill].text()) for skill in skills_columns}
            for value in user_skills.values():
                if value < 0 or value > 5:
                    raise ValueError("Skill ratings must be between 0 and 5.")
            
            # Get recommendations
            recommended_jobs = find_similar_jobs(user_skills)
            
            # Display recommendations
            result = ""
            for _, row in recommended_jobs.iterrows():
                result += (f"{row['2023 National Employment Matrix title']} "
                           f"(${row['Median annual wage, dollars, 2023[1]']:.2f}) - "
                           f"{row['Typical education needed for entry']} - Similarity: {row['Similarity']:.2f}\n")
            
            self.result_text.setText(result)
        except ValueError as e:
            QMessageBox.critical(self, "Invalid Input", str(e))
        except Exception as e:
            QMessageBox.critical(self, "Error", str(e))

def find_similar_jobs(skills):
    user_data = pd.DataFrame(skills, index=['user'])
    user_vector = user_data.values.flatten()
    similarity_scores = cosine_similarity(user_vector.reshape(1, -1), features)
    top_5_indices = similarity_scores.argsort()[0][-5:][::-1]
    top_5_jobs = df.iloc[top_5_indices]
    top_5_jobs['Similarity'] = similarity_scores[0][top_5_indices]
    return top_5_jobs[['2023 National Employment Matrix title', 'Median annual wage, dollars, 2023[1]', 'Typical education needed for entry', 'Similarity', '2023 National Employment Matrix code']]

if __name__ == '__main__':
    app = QApplication(sys.argv)
    ex = JobRecommendationApp()
    ex.show()
    sys.exit(app.exec_())
