In [None]:
!pip install pymupdf python-docx

In [5]:
import fitz  # PyMuPDF
import re
from docx import Document


def extract_text(file_path):
    if file_path.endswith('.pdf'):
        document = fitz.open(file_path)
        text = ""
        for page_num in range(document.page_count):
            page = document.load_page(page_num)
            text += page.get_text()
        return text
    elif file_path.endswith('.docx'):
        doc = Document(file_path)
        text = ""
        for para in doc.paragraphs:
            text += para.text + '\n'
        return text
    else:
        raise ValueError("Unsupported file format")

def extract_name(text):
    # Look for typical name patterns, including fully uppercase names
    match = re.search(r'^(Name:\s*)?([A-Z][a-z]+ [A-Z][a-z]+|[A-Z ]{2,})$', text, re.MULTILINE)
    return match.group(2).strip().title() if match else "Name not found"

def extract_email(text):
    email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
    match = re.search(email_pattern, text)
    return match.group(0) if match else None


def extract_phone(text):
    phone_pattern = r'(\+?\d{1,3}[\s-]?)?(\(?\d{3}\)?[\s-]?\d{3}[\s-]?\d{4})'
    match = re.search(phone_pattern, text)
    return match.group(0) if match else None


def extract_work_experience(text):
    work_experience = re.search(r'WORK EXPERIENCE\s*(.*?)(?:EDUCATION|SKILLS|PROJECTS|CONTACT|$)', text,
                                re.DOTALL | re.IGNORECASE)
    if work_experience:
        return work_experience.group(1).strip()
    return ''

def extract_skills(text):
    # Remove sections like 'CAREER OBJECTIVE' and 'CERTIFICATIONS' entirely
    cleaned_text = re.sub(r'CAREER OBJECTIVE.*?(?=(WORK EXPERIENCE|EDUCATION|SKILLS|PROJECTS|CONTACT|$))', '', text,
                          flags=re.DOTALL | re.IGNORECASE)
    cleaned_text = re.sub(r'CERTIFICATIONS.*?(?=(WORK EXPERIENCE|EDUCATION|SKILLS|PROJECTS|CONTACT|$))', '',
                          cleaned_text, flags=re.DOTALL | re.IGNORECASE)

    # Remove any remaining lines that contain career objectives or irrelevant text
    cleaned_text = re.sub(r'(^.*Seeking.*$|^.*skills and outstanding aptitude for learning.*$|^.*Grumman.*$)', '',
                          cleaned_text, flags=re.MULTILINE | re.IGNORECASE)

    skills_section = re.search(r'SKILLS\s*(.*?)(?:WORK EXPERIENCE|EDUCATION|PROJECTS|CONTACT|$)', cleaned_text,
                               re.DOTALL | re.IGNORECASE)
    if skills_section:
        skills_text = skills_section.group(1).strip()
        # Split the skills by newlines or commas
        skills_list = re.split(r'\n|,', skills_text)
        # Filter out any empty strings and unnecessary entries
        filtered_skills = [skill.strip() for skill in skills_list if skill.strip()]
        # Remove duplicates while preserving order
        unique_skills = []
        for skill in filtered_skills:
            if skill not in unique_skills:
                unique_skills.append(skill)
        return unique_skills
    return []

def extract_projects(text):
    projects_section = re.search(r'PROJECTS\s*(.*?)(?:WORK EXPERIENCE|EDUCATION|SKILLS|CONTACT|$)', text,
                                 re.DOTALL | re.IGNORECASE)
    if projects_section:
        projects_text = projects_section.group(1)
        projects = re.split(r'\n', projects_text)
        return [project.strip() for project in projects if project.strip()]
    return []


def score_resume(work_experience, skills, projects, required_skills):
    score = 0
    score_details = {
        'experience': 0,
        'skills': 0,
        'projects': 0
    }

    # Assuming each year of experience counts as 1 point
    experience_years = re.findall(r'\b(\d+)\s+years?', work_experience, re.IGNORECASE)
    for exp in experience_years:
        score_details['experience'] += int(exp)
    score += score_details['experience']

    # Each matching skill adds 2 points
    for skill in skills:
        if skill.lower() in [req_skill.lower() for req_skill in required_skills]:
            score_details['skills'] += 2
    score += score_details['skills']

    # Each project adds 1 point
    score_details['projects'] = len(projects)
    score += score_details['projects']

    return score, score_details


def parse_job_description(job_description):
    skills_section = re.search(r'Skills Required:\s*(.*?)\s*Job Description:', job_description,
                               re.DOTALL | re.IGNORECASE)
    if skills_section:
        skills_text = skills_section.group(1)
        skills = re.split(r'\n|,', skills_text)
        return [skill.strip() for skill in skills if skill.strip()]
    return []


def process_resumes(resume_paths, job_description):
    required_skills = parse_job_description(job_description)
    print("Required Skills:", required_skills)

    resume_details = []
    for path in resume_paths:
        print(f"\nProcessing {path}...")
        text = extract_text(path)

        name = extract_name(text)
        email = extract_email(text)
        phone = extract_phone(text)
        work_experience = extract_work_experience(text)
        skills = extract_skills(text)
        projects = extract_projects(text)

        print(f"Extracted Name: {name}")
        print(f"Extracted Email: {email}")
        print(f"Extracted Phone: {phone}")
        print(f"Extracted Work Experience: {work_experience}")
        print(f"Extracted Skills: {skills}")
        print(f"Extracted Projects: {projects}")

        score, score_details = score_resume(work_experience, skills, projects, required_skills)

        resume_details.append({
            'name': name,
            'email': email,
            'phone': phone,
            'work_experience': work_experience,
            'skills': skills,
            'projects': projects,
            'score': score,
            'score_details': score_details
        })

    sorted_resumes = sorted(resume_details, key=lambda x: x['score'], reverse=True)

    return sorted_resumes


# Example usage
resume_paths = ['/content/resume3.pdf', '/content/resume1.pdf', '/content/resume2.docx']
job_description = """
Skills Required:
Python, Machine Learning, Data Analysis, NLP

Job Description:
We are looking for a skilled data scientist with experience in machine learning and data analysis.
"""

sorted_resumes = process_resumes(resume_paths, job_description)

for resume in sorted_resumes:
    print(f"\nName: {resume['name']}")
    print(f"\nEmail: {resume['email']}")
    print(f"\nPhone: {resume['phone']}")
    print(f"Work Experience: {resume['work_experience']}")
    print(f"Skills: {resume['skills']}")
    print(f"Projects: {resume['projects']}")
    print(f"Score: {resume['score']}")
    print(f"\nScoring Breakdown")
    print(f" Work Experience: {resume['score_details']['experience']} points")
    print(f" Skills: {resume['score_details']['skills']} points")
    print(f" Projects: {resume['score_details']['projects']} points")
    print(f" Total: {resume['score']} points")


Required Skills: ['Python', 'Machine Learning', 'Data Analysis', 'NLP']

Processing /content/resume3.pdf...
Extracted Name: Kandace Loudor
Extracted Email: kloudor@email.com
Extracted Phone: (123) 456-7890
Extracted Work Experience: Data Scientist
Grubhub
June 2018 - current / Princeton, NJ
Deployed a recommendation engine to production to
conditionally recommend other menu items based on past order
history, increasing average order size by 7%
Implemented various time series forecasting techniques to
predict surge in orders, lowering customer wait by 10 minutes
Designed a model in a pilot to increase incentives for drivers
during peak hours, increasing driver availability by 22%
Led a team of 3 data scientist to model the ordering process 5
unique ways, reported results, and made recommendations to
increase order output by 9%
Data Scientist
Spectrix Analytical Services
March 2016 - June 2018 / Princeton, NJ
Built a customer attrition random forest model that improved
monthly retention 

In [6]:
resume_paths = ['/content/resume2.pdf']
job_description = """
Skills Required:
Python, Machine Learning, Data Analysis, NLP

Job Description:
We are looking for a skilled data scientist with experience in machine learning and data analysis.
"""

sorted_resumes = process_resumes(resume_paths, job_description)

for resume in sorted_resumes:
    print(f"\nEmail: {resume['email']}")
    print(f"Work Experience: {resume['work_experience']}")
    print(f"Skills: {resume['skills']}")
    print(f"Projects: {resume['projects']}")
    print(f"Score: {resume['score']}")
    print(f"\nScoring Breakdown")
    print(f" Work Experience: {resume['score_details']['experience']} points")
    print(f" Skills: {resume['score_details']['skills']} points")
    print(f" Projects: {resume['score_details']['projects']} points")
    print(f" Total: {resume['score']} points")

Required Skills: ['Python', 'Machine Learning', 'Data Analysis', 'NLP']

Processing /content/resume2.pdf...
Extracted Name: Ambrose
Extracted Email: Ambro_@email.com
Extracted Phone: (123) 456-7890
Extracted Work Experience: Data Scientist Intern
County of Ventura
2020 - current
Ventura, CA
Designed and implemented over 40 machine-learning
models for different programs and
Extracted Skills: ['Machine and Deep Learning', 'Statistical Analysis', 'Processing Large Data Sets', 'Data Visualization', 'Mathematics', 'Programming', 'Data Wrangling', 'skills and outstanding aptitude for']
Extracted Projects: ['Verified results of algorithms to predict future occurrences', 'using real-world programs data with 82% precision', 'Extracted raw data from Twitter APIs and analyzed tweets to', 'generate analysis showing trends in public opinion', 'regarding policy changes', 'Developed a Java application that performed pattern', 'analysis of criminal incidents to help identify and visualize', 'hotspots 

In [7]:
resume_paths = ['/content/resume4.pdf']
job_description = """
Skills Required:
Python, Machine Learning, Data Analysis, NLP

Job Description:
We are looking for a skilled data scientist with experience in machine learning and data analysis.
"""

sorted_resumes = process_resumes(resume_paths, job_description)

for resume in sorted_resumes:
    print(f"\nEmail: {resume['email']}")
    print(f"Work Experience: {resume['work_experience']}")
    print(f"Skills: {resume['skills']}")
    print(f"Projects: {resume['projects']}")
    print(f"Score: {resume['score']}")
    print(f"\nScoring Breakdown")
    print(f" Work Experience: {resume['score_details']['experience']} points")
    print(f" Skills: {resume['score_details']['skills']} points")
    print(f" Projects: {resume['score_details']['projects']} points")
    print(f" Total: {resume['score']} points")

Required Skills: ['Python', 'Machine Learning', 'Data Analysis', 'NLP']

Processing /content/resume4.pdf...
Extracted Name: Trish Mathers
Extracted Email: tmathers@email.com
Extracted Phone: (123) 456-7890
Extracted Work Experience: Niantic
Data Scientist Intern
Seattle, WA | April 2022 - December 2022
Developed a program in SAS that automated refinement of linear
regression models for specific segments of a customer base that
saved 22 hours of labor per month.
Received, cleaned, and prepped data from client using SAS, SQL, and
Excel to help data scientists build marketing mix models that resulted
in a lift in ROI of 10 basis points.
Seattle University Tutor Center
Statistics and Mathematics Tutor
Seattle, WA | April 2020 - April 2022
Assessed students' learning to determine learning weaknesses and
needs, successfully helping students perform 13% better in algebra,
pre-calculus, calculus, and statistics undergraduate courses.
Met with 30+ students per week through online learning platf

In [9]:
resume_paths = ['/content/resume5.pdf']
job_description = """
Skills Required:
Python, Machine Learning, Data Analysis, NLP

Job Description:
We are looking for a skilled data scientist with experience in machine learning and data analysis.
"""

sorted_resumes = process_resumes(resume_paths, job_description)

for resume in sorted_resumes:
    print(f"\nEmail: {resume['email']}")
    print(f"Work Experience: {resume['work_experience']}")
    print(f"Skills: {resume['skills']}")
    print(f"Projects: {resume['projects']}")
    print(f"Score: {resume['score']}")
    print(f"\nScoring Breakdown")
    print(f" Work Experience: {resume['score_details']['experience']} points")
    print(f" Skills: {resume['score_details']['skills']} points")
    print(f" Projects: {resume['score_details']['projects']} points")
    print(f" Total: {resume['score']} points")

Required Skills: ['Python', 'Machine Learning', 'Data Analysis', 'NLP']

Processing /content/resume5.pdf...
Extracted Name: Work Experience
Extracted Email: tcoleman@email.com
Extracted Phone: (123) 456-7890
Extracted Work Experience: Best Buy - Senior Data Scientist
October 2018 - current
Remote
· Led data extraction and evaluation efforts to save Best Buy more than 11M over the course of tenure
· Partnered with product team to build a production recommendation engine in Python that improved the
average length on page for users and resulted in $450K in incremental annual revenue
· Created a customer attrition random forest model, improving monthly retention by 6 basis points for
customers likely to attrit by servicing relevant product features for them
· Communicated with PMs to lead 4 data scientists in project planning, development, and execution
· Coached data team throughout short and long-term
Extracted Skills: ['Python (NumPy', 'Pandas', 'Scikit-learn', 'Flask)', 'SAS; SQL - Red

In [10]:
resume_paths = ['/content/resume6.pdf']
job_description = """
Skills Required:
Python, Machine Learning, Data Analysis, NLP

Job Description:
We are looking for a skilled data scientist with experience in machine learning and data analysis.
"""

sorted_resumes = process_resumes(resume_paths, job_description)

for resume in sorted_resumes:
    print(f"\nEmail: {resume['email']}")
    print(f"Work Experience: {resume['work_experience']}")
    print(f"Skills: {resume['skills']}")
    print(f"Projects: {resume['projects']}")
    print(f"Score: {resume['score']}")
    print(f"\nScoring Breakdown")
    print(f" Work Experience: {resume['score_details']['experience']} points")
    print(f" Skills: {resume['score_details']['skills']} points")
    print(f" Projects: {resume['score_details']['projects']} points")
    print(f" Total: {resume['score']} points")

Required Skills: ['Python', 'Machine Learning', 'Data Analysis', 'NLP']

Processing /content/resume6.pdf...
Extracted Name: Yasmin Patel
Extracted Email: y.patel@email.com
Extracted Phone: (123) 456-7890
Extracted Work Experience: Retail associate
TJ Maxx
2022 - current
Cambridge, MA
Exceeded monthly sales targets by 22%, contributing to the
store's recognition as a top-performing location.
Updated store layouts to increase customer engagement with
featured products by 48%.
Recognized by management for providing exceptional service
after earning an average customer satisfaction rating of 93%.
Conducted regular stock checks using inventory management
systems, which minimized out-of-stock incidents by 29%.
Extracted Skills: ['NumPy', 'Scikit-learn', 'dplyr', 'MySQL', 'SQLite', 'Keras']
Extracted Projects: ['Library assistant', 'Harvard University', '2022', 'Recommended personalized book titles to library patrons that', 'led to 89% satisfaction ratings.', 'Collaborated with local nonprofi

In [11]:
resume_paths = ['/content/resume7.pdf']
job_description = """
Skills Required:
Python, Machine Learning, Data Analysis, NLP

Job Description:
We are looking for a skilled data scientist with experience in machine learning and data analysis.
"""

sorted_resumes = process_resumes(resume_paths, job_description)

for resume in sorted_resumes:
    print(f"\nEmail: {resume['email']}")
    print(f"Work Experience: {resume['work_experience']}")
    print(f"Skills: {resume['skills']}")
    print(f"Projects: {resume['projects']}")
    print(f"Score: {resume['score']}")
    print(f"\nScoring Breakdown")
    print(f" Work Experience: {resume['score_details']['experience']} points")
    print(f" Skills: {resume['score_details']['skills']} points")
    print(f" Projects: {resume['score_details']['projects']} points")
    print(f" Total: {resume['score']} points")

Required Skills: ['Python', 'Machine Learning', 'Data Analysis', 'NLP']

Processing /content/resume7.pdf...
Extracted Name: Marco
Extracted Email: m.rodriguez@email.com
Extracted Phone: (123) 456-7890
Extracted Work Experience: Data Visualization Specialist
Intuit
2022 - current
San Francisco, CA
Led a geospatial analysis in Tableau to uncover under-served
markets, creating a strategic expansion plan that saw a 16%
growth in new customer acquisition.
Devised a Microsoft Power BI tool for real-time monitoring of
cloud infrastructure costs at Intuit, shrinking unnecessary
expenditures by $19,544.
Leveraged Matplotlib with real-time data feeds to track e-
commerce transactions, identifying a 13% rise in mobile
payments.
Initiated a data visualization consistency project,
standardizing visuals across 246 reports in Illustrator,
slashing discrepancies and errors by 43%.
Data Analyst
Salesforce
2019 - 2022
San Francisco, CA
Automated Salesforce data extraction, reducing data
preparation time

In [18]:
resume_paths = ['/content/resume8.pdf']
job_description = """
Skills Required:
Python, Machine Learning, Data Analysis, NLP

Job Description:
We are looking for a skilled data scientist with experience in machine learning and data analysis.
"""

sorted_resumes = process_resumes(resume_paths, job_description)

for resume in sorted_resumes:
    print(f"\nEmail: {resume['email']}")
    print(f"Work Experience: {resume['work_experience']}")
    print(f"Skills: {resume['skills']}")
    print(f"Projects: {resume['projects']}")
    print(f"Score: {resume['score']}")
    print(f"\nScoring Breakdown")
    print(f" Work Experience: {resume['score_details']['experience']} points")
    print(f" Skills: {resume['score_details']['skills']} points")
    print(f" Projects: {resume['score_details']['projects']} points")
    print(f" Total: {resume['score']} points")

Required Skills: ['Python', 'Machine Learning', 'Data Analysis', 'NLP']

Processing /content/resume8.pdf...
Extracted Name: Aiden Tan
Extracted Email: a.tan@email.com
Extracted Phone: (123) 456-7890
Extracted Work Experience: Healthcare Data Scientist
Meritus Health
2021 - current
Hagerstown, MD
Developed a predictive readmission model in Python, which achieved an
accuracy rate of 86%, helping Meritus Health proactively identify at-risk
patients.
Leveraged Apache Hadoop to figure out the complexities in healthcare
data and discover hidden patterns and anomalies that led to an 18%
reduction in diagnostic errors.
Devised a Tableau remote monitoring dashboard for tracking vital signs
and patient conditions remotely, reducing the need for in-person visits
by 36%.
Integrated TensorFlow into Meritus Health's electronic health record
(EHR) system to allow predictive analytics for clinical decision support,
decreasing medical errors by 14%.
Healthcare Data Analyst
Maxim Healthcare Services
201

In [13]:
resume_paths = ['/content/resume9.pdf']
job_description = """
Skills Required:
Python, Machine Learning, Data Analysis, NLP

Job Description:
We are looking for a skilled data scientist with experience in machine learning and data analysis.
"""

sorted_resumes = process_resumes(resume_paths, job_description)

for resume in sorted_resumes:
    print(f"\nEmail: {resume['email']}")
    print(f"Work Experience: {resume['work_experience']}")
    print(f"Skills: {resume['skills']}")
    print(f"Projects: {resume['projects']}")
    print(f"Score: {resume['score']}")
    print(f"\nScoring Breakdown")
    print(f" Work Experience: {resume['score_details']['experience']} points")
    print(f" Skills: {resume['score_details']['skills']} points")
    print(f" Projects: {resume['score_details']['projects']} points")
    print(f" Total: {resume['score']} points")

Required Skills: ['Python', 'Machine Learning', 'Data Analysis', 'NLP']

Processing /content/resume9.pdf...
Extracted Name: Emma Davis
Extracted Email: e.davis@email.com
Extracted Phone: (123) 456-7890
Extracted Work Experience: Adobe - Data Scientist
2018 - current
San Jose, CA
Led data analysis initiatives that resulted in a 37% increase in customer retention rates.
Developed predictive models using TensorFlow, reducing forecasting errors by 21%.
Implemented Apache Hadoop to analyze large-scale datasets, improving data processing speed by
33%.
Utilized Pandas and Python for data manipulation, resulting in a 2-hour reduction in data cleaning
time.
Cisco Systems - Junior Data Engineer
2015 - 2018
San Jose, CA
Collaborated with a cross-functional team to develop ETL pipelines, improving data processing
efficiency by 26%.
Leveraged Amazon Redshift to optimize data warehouse performance, resulting in a 3-hour reduction
in query execution times.
Automated data ingestion processes using AWS

In [15]:
resume_paths = ['/content/resume10.pdf']
job_description = """
Skills Required:
Python, Machine Learning, Data Analysis, NLP

Job Description:
We are looking for a skilled data scientist with experience in machine learning and data analysis.
"""

sorted_resumes = process_resumes(resume_paths, job_description)

for resume in sorted_resumes:
    print(f"\nEmail: {resume['email']}")
    print(f"Work Experience: {resume['work_experience']}")
    print(f"Skills: {resume['skills']}")
    print(f"Projects: {resume['projects']}")
    print(f"Score: {resume['score']}")
    print(f"\nScoring Breakdown")
    print(f" Work Experience: {resume['score_details']['experience']} points")
    print(f" Skills: {resume['score_details']['skills']} points")
    print(f" Projects: {resume['score_details']['projects']} points")
    print(f" Total: {resume['score']} points")

Required Skills: ['Python', 'Machine Learning', 'Data Analysis', 'NLP']

Processing /content/resume10.pdf...
Extracted Name: David Robinson
Extracted Email: d.robinson@email.com
Extracted Phone: (123) 456-7890
Extracted Work Experience: Data Scientist - DataRobot
2017 - current
Boston, MA
Developed predictive models using PyTorch that improved accuracy by 11%.
Designed interactive dashboards in Tableau to visualize key performance indicators, leading to 23%
improvement in decision-making processes.
Collaborated with cross-functional teams to define data-driven strategies, resulting in a $253K
increase in revenue.
Conducted A/B testing using Python and statistical methods, optimizing conversion rate by 18%.
Junior Data Scientist - Wayfair
2014 - 2017
Boston, MA
Deployed machine learning models on AWS Lambda, improving response time by 2 hours.
Used spaCy for named entity recognition (NER) tasks, achieving an accuracy rate of 94%.
Extracted, transformed, and loaded (ETL) large datasets, 

In [16]:
resume_paths = ['/content/resume11.pdf']
job_description = """
Skills Required:
Python, Machine Learning, Data Analysis, NLP

Job Description:
We are looking for a skilled data scientist with experience in machine learning and data analysis.
"""

sorted_resumes = process_resumes(resume_paths, job_description)

for resume in sorted_resumes:
    print(f"\nEmail: {resume['email']}")
    print(f"Work Experience: {resume['work_experience']}")
    print(f"Skills: {resume['skills']}")
    print(f"Projects: {resume['projects']}")
    print(f"Score: {resume['score']}")
    print(f"\nScoring Breakdown")
    print(f" Work Experience: {resume['score_details']['experience']} points")
    print(f" Skills: {resume['score_details']['skills']} points")
    print(f" Projects: {resume['score_details']['projects']} points")
    print(f" Total: {resume['score']} points")

Required Skills: ['Python', 'Machine Learning', 'Data Analysis', 'NLP']

Processing /content/resume11.pdf...
Extracted Name: John Williams
Extracted Email: johnwilliams@gmail.com
Extracted Phone: 101-900-6543
Extracted Work Experience: 2014-2019
California
GPA: 8.7
Senior Data Scientist
Tasks
• Developed end-to-end machine learning prototypes and scaled them to run in production environments. Increased 
eficiency by 23%
• Derived actionable insights from massive data sets with minimal support. 
• Provided input into the collection of new data sources and the refinement of existing onces to improve analysis and 
model development. 
Achievements
• Applied data mining to analyze procurement processes resulting in savings of $420,000 a year. 
Machine Learning
Data Visualization
Data Mining
Python
Scala
NLP
Problem Solving
Fast Learner
Leadership
Risk Analyisis
Time Management
Extracted Skills: ['ET', 'Technical Skills', 'Sof Skills']
Extracted Projects: ['Technical Content Writing', 'Space

In [17]:
resume_paths = ['/content/resume12.pdf']
job_description = """
Skills Required:
Python, Machine Learning, Data Analysis, NLP

Job Description:
We are looking for a skilled data scientist with experience in machine learning and data analysis.
"""

sorted_resumes = process_resumes(resume_paths, job_description)

for resume in sorted_resumes:
    print(f"\nEmail: {resume['email']}")
    print(f"Work Experience: {resume['work_experience']}")
    print(f"Skills: {resume['skills']}")
    print(f"Projects: {resume['projects']}")
    print(f"Score: {resume['score']}")
    print(f"\nScoring Breakdown")
    print(f" Work Experience: {resume['score_details']['experience']} points")
    print(f" Skills: {resume['score_details']['skills']} points")
    print(f" Projects: {resume['score_details']['projects']} points")
    print(f" Total: {resume['score']} points")

Required Skills: ['Python', 'Machine Learning', 'Data Analysis', 'NLP']

Processing /content/resume12.pdf...
Extracted Name: Data Science
Extracted Email: juanjose.carin@gmail.com
Extracted Phone: 650-336-4590
Extracted Work Experience: 
Extracted Skills: ['Programming / Statistics', 'Big Data', 'Visualization', 'Others', 'Proficient:', 'R', 'Python', 'SQL', 'Hadoop', 'Hive', 'MrJob', 'Tableau', 'Git', 'AWS', 'Intermediate:', 'SPSS', 'SAS', 'Matlab', 'Spark', 'Storm', 'Bash', 'Basic:', 'EViews', 'Demetra+', 'D3.js', 'Gephi', 'Neo4j', 'QGIS', 'Experience', 'DATA SCIENCE', 'Jan. 2016 – Mar. 2016', 'Data Scientist', 'CONENTO', 'Madrid', 'Spain (working remotely)', '• Designed and implemented the ETL pipeline for a predictive model of traffic on the main roads in', 'eastern Spain (a project for the Spanish government).', '• Automated scripts in R to extract', 'transform', 'clean (incl. anomaly detection)', 'and load into MySQL', 'data from multiple data sources: road traffic sensors', 'acc