In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

df = pd.read_csv("Math_Final_CSV_cleaned.csv")

# User input
user_skills_input = input("Please enter your skills (comma-separated): ")

# Processing user skills
#Input is processed into a list of strings and converted to uppercase
processed_user_skills = [skill.strip().upper() for skill in user_skills_input.split(',')]
print(processed_user_skills)


# Calculate similarity Score using Cosine Similarity


def calculate_skill_similarity_cosine(user_skills, data_skills_str):
    #"""Calculates the cosine similarity between user skills and data skills."""
    try:
        # Safely evaluate the string representation of the list and join into a single string
        data_skills_list = eval(data_skills_str)
        if isinstance(data_skills_list, list):
            data_skills_processed = ' '.join([skill.strip().upper() for skill in data_skills_list])
        else:
            data_skills_processed = '' # Handle cases where eval doesn't return a list
    except (SyntaxError, TypeError):
        data_skills_processed = '' # Handle cases where data_skills_str is not a valid string representation of a list

    user_skills_processed = ' '.join(user_skills)


    # Create a CountVectorizer to convert skills into vectors


    vectorizer = CountVectorizer().fit_transform([user_skills_processed, data_skills_processed])
    vectors = vectorizer.toarray()

    # Calculate cosine similarity
    cosine_sim = cosine_similarity(vectors)

    return cosine_sim[0, 1]


# Determining Acceptance Score using Cosine Similarity


df['skill_similarity_score_cosine'] = df['skills'].apply(lambda x: calculate_skill_similarity_cosine(processed_user_skills, x))
display(df[['name', 'skills', 'skill_similarity_score_cosine']].head(7))

# Create a copy to avoid SettingWithCopyWarning
accepted_applications_df_cosine = df[df['skill_similarity_score_cosine'] >= 0.25].copy()

# Displaying Result

if not accepted_applications_df_cosine.empty:
    print("Application Accepted: At least one match found with 85% or higher skill similarity!!")
    # Function to find matched skills
    def find_matched_skills(user_skills, data_skills_str):
        try:
            data_skills_list = eval(data_skills_str)
            if isinstance(data_skills_list, list):
                data_skills_processed = [skill.strip().upper() for skill in data_skills_list]
                matched = list(set(user_skills).intersection(data_skills_processed))
                return matched
            else:
                return []
        except (SyntaxError, TypeError):
            return []

    accepted_applications_df_cosine['matched_skills'] = accepted_applications_df_cosine['skills'].apply(lambda x: find_matched_skills(processed_user_skills, x))

    accepted_applications_df_cosine = accepted_applications_df_cosine.sort_values(by='skill_similarity_score_cosine', ascending=False)

    display(accepted_applications_df_cosine[['name', 'skills', 'matched_skills', 'skill_similarity_score_cosine']])
else:
    print("\nApplication Rejected: No match found with 85% or higher skill similarity!!")
if not accepted_applications_df_cosine.empty:
    print("Top 10 Matched..!!")
    display(accepted_applications_df_cosine[['name', 'skills', 'matched_skills', 'skill_similarity_score_cosine']].head(10))
else:
    print("Application Rejected!!")

Please enter your skills (comma-separated): 'PYTHON', 'MACHINE LEARNING', 'DEEP LEARNING', 'R', 'SQL', 'EXCEL'
["'PYTHON'", "'MACHINE LEARNING'", "'DEEP LEARNING'", "'R'", "'SQL'", "'EXCEL'"]


Unnamed: 0,name,skills,skill_similarity_score_cosine
0,Rohan,"['BIG DATA', 'HADOOP', 'HIVE', 'PYTHON', 'MAPR...",0.225374
1,Aisha,"['DATA ANALYSIS', 'DATA ANALYTICS', 'BUSINESS ...",0.19245
2,Nisha,"['SOFTWARE DEVELOPMENT', 'MACHINE LEARNING', '...",0.521749
3,Anand,"['ACCOUNTS PAYABLES', 'ACCOUNTS RECEIVABLES', ...",0.042679
4,Vidit,"['ANALYTICAL REASONING', 'COMPLIANCE TESTING K...",0.0
5,Raj,"['MICROSOFT APPLICATIONS', 'NETWORK SECURITY',...",0.0
6,Riddhi,"['MACHINE LEARNING', 'LINEAR REGRESSION', 'RID...",0.235702


Application Accepted: At least one match found with 85% or higher skill similarity!!


Unnamed: 0,name,skills,matched_skills,skill_similarity_score_cosine
2040,Vinay,"['PYTHON', 'MYSQL', 'TENSORFLOW', 'KERAS', 'MA...",[],0.737865
1924,Rakesh,"['PYTHON', 'MYSQL', 'TENSORFLOW', 'KERAS', 'MA...",[],0.737865
9147,Rajat,"['PYTHON', 'MYSQL', 'TENSORFLOW', 'KERAS', 'MA...",[],0.737865
1936,Anaya,"['PYTHON', 'MYSQL', 'TENSORFLOW', 'KERAS', 'MA...",[],0.737865
9176,Amit,"['PYTHON', 'MYSQL', 'TENSORFLOW', 'KERAS', 'MA...",[],0.737865
...,...,...,...,...
7460,Sakshi,"['MACHINE LEARNING', 'DATA ANALYSIS', 'DATA VI...",[],0.251976
6830,Anirudh,"['MACHINE LEARNING', 'DATA ANALYSIS', 'DATA VI...",[],0.251976
1254,Ayaan,"['MACHINE LEARNING', 'DATA ANALYSIS', 'DATA VI...",[],0.251976
9432,Ayush,"['MACHINE LEARNING', 'DATA ANALYSIS', 'DATA VI...",[],0.251976


Top 10 Matched..!!


Unnamed: 0,name,skills,matched_skills,skill_similarity_score_cosine
2040,Vinay,"['PYTHON', 'MYSQL', 'TENSORFLOW', 'KERAS', 'MA...",[],0.737865
1924,Rakesh,"['PYTHON', 'MYSQL', 'TENSORFLOW', 'KERAS', 'MA...",[],0.737865
9147,Rajat,"['PYTHON', 'MYSQL', 'TENSORFLOW', 'KERAS', 'MA...",[],0.737865
1936,Anaya,"['PYTHON', 'MYSQL', 'TENSORFLOW', 'KERAS', 'MA...",[],0.737865
9176,Amit,"['PYTHON', 'MYSQL', 'TENSORFLOW', 'KERAS', 'MA...",[],0.737865
8620,Neil,"['PYTHON', 'MYSQL', 'TENSORFLOW', 'KERAS', 'MA...",[],0.737865
4771,Akshay,"['PYTHON', 'MYSQL', 'TENSORFLOW', 'KERAS', 'MA...",[],0.737865
8673,Ayushi,"['PYTHON', 'MYSQL', 'TENSORFLOW', 'KERAS', 'MA...",[],0.737865
3769,Pari,"['PYTHON', 'MYSQL', 'TENSORFLOW', 'KERAS', 'MA...",[],0.737865
8833,Sunny,"['PYTHON', 'MYSQL', 'TENSORFLOW', 'KERAS', 'MA...",[],0.737865


 Cosine similarity doesn't just count the number of matching skills. It measures the cosine of the angle between two vectors in a multi-dimensional space.

Here's why two records with the same number of matched skills can have different cosine similarity scores:

Term Frequency: Cosine similarity considers how often each skill appears in both the user's skill set and the applicant's skill set. If a skill appears more frequently in one set than the other, even with the same number of matches, the vectors will differ, leading to a different cosine similarity.
Vector Direction: The cosine similarity is determined by the angle between the vectors. Even if the number of matching skills is the same, the specific combination and frequency of those skills can result in different vector directions, thus affecting the cosine similarity.