In [1]:
import numpy as np
import pandas as pd
from pdfminer.high_level import extract_text
import spacy
from spacy.matcher import PhraseMatcher
import re
import requests
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.corpus import stopwords
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\parshwa\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

Loading Resume as text from PDF

In [3]:
def text_from_pdf(file_path):
    return extract_text(file_path)

In [4]:
resume_text=text_from_pdf('files/202201165_Parshwa_Modi.pdf')
print(resume_text[:1000])

Parshwa Modi

B.Tech – Information and Communication Technology

# 202201165@dau.ac.in (cid:239) linkedin.com/in/parshwa § github.com/parshwa

Education

Dhirubhai Ambani University
CPI: 8.61

12th Parth School of Science & Competition (GHSEB)
Percentage: 89.08%

10th Tejas Vidyalaya (GSEB)
Percentage: 90.33%

Experience

Digikentro
.NET Intern

2022 - Present
Gandhinagar, Gujarat

2021 - 2022
Vadodara, Gujarat

2019 - 2020
Vadodara, Gujarat

March 2025 – July 2025
Vadodara, Gujarat

• Gained hands-on experience with the .NET architecture and contributed to improving existing APIs for a

production-level application.

• Designed and developed secure, token-based RESTful APIs from scratch to support new product features.
• Deployed backend services and successfully integrated them with the frontend using FlutterFlow, ensuring smooth

end-to-end functionality.

Projects

Software Development- EventSphere: | React.js, Node.js, tailwind CSS, Vite, MongoDB

§

• Collaborated in a team to de

Extracting Information from resume

In [5]:
nlp=spacy.load("en_core_web_sm")
text=resume_text.lower()
doc=nlp(text)
skills_list=["python","java","machine learning","sql","excel","pandas","django"]

matcher=PhraseMatcher(nlp.vocab,attr="LOWER")
patterns=[nlp(skill) for skill in skills_list]
matcher.add("SKILLS",patterns)

matches=matcher(doc)
found_skills=list(set([doc[start:end].text for _,start,end in matches]))
print(found_skills)

['sql', 'python']


Scrape Jobs from Web

In [35]:
def scrape_jobs():
    url="https://remoteok.com/remote-dev-jobs"
    headers={"User-Agent": "Mozilla/5.0"}
    response=requests.get(url,headers=headers)

    soup=BeautifulSoup(response.text,"html.parser")
    jobs=soup.find_all("tr",class_="job")

    job_list=[]
    for job in jobs:
        try:
            title=job.find("h2",{"itemprop": "title"}).text.strip()
            company=job.find("h3",{"itemprop":"name"}).text.strip()
            tag_elements=job.find_all("div",class_="tag")
            tags=[t.find("h3").get_text(strip=True) for t in tag_elements if t.find("h3")]
            link="https://remoteok.com" + job.get("data-href")

            job_list.append({
                "title":title,
                "company":company,
                "tags":", ".join(tags),
                "description":title+" "+" ".join(tags),
                "link": link
            })
        except:
            continue

    df=pd.DataFrame(job_list)
    df.to_csv("scraped_jobs.csv",index=False)
    df.head()
    print("Jobs scraped and saved to scraped_jobs.csv")



In [36]:
scrape_jobs()

Jobs scraped and saved to scraped_jobs.csv


In [37]:
jobs_df=pd.read_csv("scraped_jobs.csv")
jobs_df.head()

Unnamed: 0,title,company,tags,description,link
0,Lead Data Engineer,Open Architects,"Engineer, DevOps, Python, Data, DataOps",Lead Data Engineer Engineer DevOps Python Data...,https://remoteok.com/remote-jobs/remote-lead-d...
1,Javascript Fullstack Engineer Senior,Lumenalta,"Engineer, JavaScript",Javascript Fullstack Engineer Senior Engineer ...,https://remoteok.com/remote-jobs/remote-javasc...
2,Software Engineer,ControlShift,"Ruby, Vue",Software Engineer Ruby Vue,https://remoteok.com/remote-jobs/remote-softwa...
3,Application Engineer,LaunchBrightly,"Engineer, JavaScript, Front End, API, GraphQL,...",Application Engineer Engineer JavaScript Front...,https://remoteok.com/remote-jobs/remote-applic...
4,Svelte Developer,Green Stuff World,"JavaScript, Typescript, Svelte, Tailwind, Kotlin",Svelte Developer JavaScript Typescript Svelte ...,https://remoteok.com/remote-jobs/remote-svelte...


In [38]:
jobs_df.shape

(19, 5)

Match the Resume with the Jobs

In [39]:
jobs_astext=jobs_df['description'].astype(str).tolist()

In [40]:
all_text=[resume_text]+jobs_astext

In [41]:
tfid_vectorizer=TfidfVectorizer(stop_words='english')
tfid_matrix=tfid_vectorizer.fit_transform(all_text)

In [42]:
print(tfid_matrix)

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 357 stored elements and shape (20, 270)>
  Coords	Values
  (0, 185)	0.13991155010948428
  (0, 171)	0.04663718336982809
  (0, 244)	0.07398292386529791
  (0, 143)	0.04663718336982809
  (0, 66)	0.09327436673965618
  (0, 246)	0.04663718336982809
  (0, 10)	0.04663718336982809
  (0, 84)	0.04663718336982809
  (0, 22)	0.04663718336982809
  (0, 54)	0.04663718336982809
  (0, 13)	0.04663718336982809
  (0, 157)	0.04663718336982809
  (0, 64)	0.09327436673965618
  (0, 133)	0.09327436673965618
  (0, 102)	0.04663718336982809
  (0, 97)	0.04663718336982809
  (0, 31)	0.04663718336982809
  (0, 255)	0.04663718336982809
  (0, 77)	0.04663718336982809
  (0, 17)	0.04663718336982809
  (0, 4)	0.04663718336982809
  (0, 186)	0.04663718336982809
  (0, 213)	0.04663718336982809
  (0, 214)	0.04663718336982809
  (0, 68)	0.04663718336982809
  :	:
  (16, 80)	0.4625482041728122
  (16, 170)	0.4065869514872798
  (17, 203)	0.4966014242683375
  (17, 148)	0.20361013

In [43]:
similarities=cosine_similarity(tfid_matrix[0:1],tfid_matrix[1:]).flatten()
jobs_df["score"]=similarities

In [44]:
top_n=5
top_jobs=jobs_df.sort_values("score",ascending=False).head(top_n)

In [49]:
top_jobs

Unnamed: 0,title,company,tags,description,link,score
10,Senior Fullstack Developer,Swipe Games,"Front End, Backend, Full Stack, JavaScript, No...",Senior Fullstack Developer Front End Backend F...,https://remoteok.com/remote-jobs/remote-senior...,0.137832
11,Senior Full Stack Developer,Zammad,"Full Stack, Full Time",Senior Full Stack Developer Full Stack Full Time,https://remoteok.com/remote-jobs/remote-senior...,0.088063
14,Senior AI Engineer Python & LLM Engineer,Lemon.io,"Python, AWS, GCP, Azure, Openai, Llm, JavaScri...",Senior AI Engineer Python & LLM Engineer Pytho...,https://remoteok.com/remote-jobs/remote-senior...,0.086453
17,Spanish Speaking Software Support Engineer,Payara,"Engineer, Java",Spanish Speaking Software Support Engineer Eng...,https://remoteok.com/remote-jobs/remote-spanis...,0.077709
6,Senior Software Engineer,ModelVault,"Full Stack, Infra",Senior Software Engineer Full Stack Infra,https://remoteok.com/remote-jobs/remote-senior...,0.075119
