In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load the dataset
def load_data(file_path):
    try:
        job_data = pd.read_csv(file_path)
        return job_data
    except FileNotFoundError:
        print(f"Error: The file at {file_path} was not found.")
        exit(1)
    except pd.errors.EmptyDataError:
        print("Error: The provided CSV file is empty.")
        exit(1)
    except Exception as e:
        print(f"An error occurred: {e}")
        exit(1)

# Prepare the TF-IDF Vectorizer for the Title column
def prepare_vectorizer(job_data):
    vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = vectorizer.fit_transform(job_data['Title'])
    return vectorizer, tfidf_matrix

# Function to recommend jobs based on user skills and required job skills
def recommend_jobs(user_skills, required_skills, vectorizer, tfidf_matrix, job_data, top_n=5):
    if not user_skills.strip():
        print("Error: User skills input is empty.")
        return pd.DataFrame()
    if not required_skills.strip():
        print("Error: Required skills input is empty.")
        return pd.DataFrame()

    # Combine both user skills and required skills to form a query
    query = f"{user_skills} {required_skills}"

    # Vectorize the input query
    query_vec = vectorizer.transform([query])

    # Compute the cosine similarity between the query and the job titles
    cosine_sim = cosine_similarity(query_vec, tfidf_matrix)

    # Get the top N most similar job indices
    top_indices = cosine_sim[0].argsort()[-top_n:][::-1]

    # Fetch the recommended jobs
    recommended_jobs = job_data.iloc[top_indices]
    return recommended_jobs[['Title', 'Required_Skills', 'Job_Description', 'URL']]

# Function to get user input
def get_user_input():
    print("### Job Recommendation System ###")
    print("Please enter your skills (separated by commas):")
    user_skills = input("Your Skills: ").strip()

    print("\nPlease enter the required skills for the job (separated by commas):")
    required_skills = input("Required Skills: ").strip()

    return user_skills, required_skills

# Main function to run the recommendation system
def main():
    # Specify the path to your dataset
    file_path = '/content/drive/MyDrive/Colab Notebooks/job_dataset'  # Replace with your actual file path

    # Load the job data
    job_data = load_data(file_path)

    # Check if essential columns exist
    required_columns = {'Title', 'Required_Skills', 'Job_Description', 'URL'}
    if not required_columns.issubset(job_data.columns):
        print(f"Error: The dataset must contain the following columns: {required_columns}")
        exit(1)

    # Prepare the vectorizer and TF-IDF matrix
    vectorizer, tfidf_matrix = prepare_vectorizer(job_data)

    while True:
        # Get user input
        user_skills, required_skills = get_user_input()

        # Get job recommendations
        recommended_jobs = recommend_jobs(user_skills, required_skills, vectorizer, tfidf_matrix, job_data)

        if not recommended_jobs.empty:
            print("\n### Recommended Jobs ###")
            for idx, row in recommended_jobs.iterrows():
                print(f"\nTitle: {row['Title']}")
                print(f"URL: {row['URL']}")
        else:
            print("No recommendations found based on the provided inputs.")

        # Ask if the user wants to make another search
        print("\nDo you want to make another search? (yes/no)")
        choice = input("Your Choice: ").strip().lower()
        if choice not in ['yes', 'y']:
            print("Thank you for using the Job Recommendation System. Goodbye!")
            break

if __name__ == "__main__":
    main()


### Job Recommendation System ###
Please enter your skills (separated by commas):
Your Skills: Mathematics

Please enter the required skills for the job (separated by commas):
Required Skills: Data Analysis

### Recommended Jobs ###

Title: Data Analysis with R Specialization
URL: https://www.coursera.org/specializations/statistics

Title: Mathematics for Machine Learning and Data Science Specialization
URL: https://www.coursera.org/specializations/mathematics-for-machine-learning-and-data-science

Title: Mathematics for Machine Learning Specialization
URL: https://www.coursera.org/specializations/mathematics-machine-learning

Title: Mathematics for Engineers Specialization
URL: https://www.coursera.org/specializations/mathematics-engineers

Title: Data Analysis with Python
URL: https://www.coursera.org/learn/data-analysis-with-python

Do you want to make another search? (yes/no)
Your Choice: no
Thank you for using the Job Recommendation System. Goodbye!
