In [None]:
import pandas as pd
import re

# Function to extract job title from LinkedIn job link
def extract_job_title(link):
    match = re.search(r"jobs/view/([^?]+)", link)  # Extract everything after 'jobs/view/' before '?'
    if match:
        job_title = match.group(1).replace("-", " ").strip()  # Replace '-' with spaces
        return job_title.title()  # Capitalize each word
    return None

# Load only the first 30,000 rows
df = pd.read_csv("job_skills.csv", encoding="utf-8", nrows=700000)

# Apply function to replace job links with extracted titles
df["job_link"] = df["job_link"].apply(lambda x: extract_job_title(str(x)) if pd.notna(x) else x)

# Save the modified dataset
df.to_csv("updated_job_skills.csv", index=False)

print("✅ CSV updated and saved as 'updated_job_skills.csv'")

✅ CSV updated and saved as 'updated_job_skills.csv'


In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load the CSV
df = pd.read_csv("updated_job_skills.csv")

# Clean data: remove rows with missing skills
df_cleaned = df.dropna(subset=['job_skills'])

# Vectorize job skills
vectorizer = TfidfVectorizer()
job_skill_vectors = vectorizer.fit_transform(df_cleaned['job_skills'])

# Simulate user's resume skills
user_skills = "Project management, Data analysis, Leadership, Communication"
user_vector = vectorizer.transform([user_skills])

# Compute cosine similarity
similarities = cosine_similarity(user_vector, job_skill_vectors).flatten()

# Get top 5 job matches
top_indices = similarities.argsort()[-5:][::-1]
top_jobs = df_cleaned.iloc[top_indices][['job_link', 'job_skills']]
top_jobs['similarity'] = similarities[top_indices]

# Display results
print(top_jobs.reset_index(drop=True))


                                            job_link  \
0  Senior Finance Data Accountant At Jobs Via Efi...   
1               Project Accountant At Cph 3769566480   
2              Project Manager At Carrier 3664483936   
3      Project Manager At Ats Corporation 3759791884   
4  Project Manager Senior Project Manager At Atki...   

                                          job_skills  similarity  
0  Data Reconciliation, Accounting Software, Fina...    0.495194  
1  Project Accounting, Project Management, Projec...    0.492654  
2  Agile methodologies, Scrum, Continuous Integra...    0.480477  
3  Automation projects management, Project leader...    0.480073  
4  Project Management, Project Leadership, Projec...    0.479045  
