# Resume Analysis Project

## 1. Import Necessary Libraries

In [11]:

import os
import re
import spacy
import pdfminer
import PyPDF2
import sqlite3
import docx
import pandas as pd
from pdfminer.high_level import extract_text
from docx import Document


## 2. Set Up Environment and NLP Model
# Load the spaCy model for NLP

In [12]:

nlp = spacy.load("en_core_web_sm")


## 3. Set Up Database for Storing Parsed Data
# Create a SQLite database to store resume data

In [13]:


conn = sqlite3.connect('resumes.db')
cursor = conn.cursor()

# Create a table if it doesn't exist
cursor.execute('''
    CREATE TABLE IF NOT EXISTS resumes (
        id INTEGER PRIMARY KEY,
        name TEXT,
        email TEXT,
        phone TEXT,
        skills TEXT,
        experience TEXT,
        education TEXT
    )
''')
conn.commit()



## 4. Resume File Handling

In [14]:

def read_pdf(file_path):
    text = extract_text(file_path)
    return text

def read_docx(file_path):
    doc = Document(file_path)
    full_text = []
    for para in doc.paragraphs:
        full_text.append(para.text)
    return '\n'.join(full_text)

def extract_text_from_resume(file_path):
    file_extension = os.path.splitext(file_path)[-1].lower()
    if file_extension == '.pdf':
        return read_pdf(file_path)
    elif file_extension == '.docx':
        return read_docx(file_path)
    else:
        raise ValueError("Unsupported file format")


## 5. Text Extraction and Data Parsing

In [15]:


def extract_contact_info(text):
    # Regular expressions for extracting email and phone numbers
    email = re.findall(r'\S+@\S+', text)
    phone = re.findall(r'\(?\d{3}\)?-?\s*-?\d{3}-?\s*-?\d{4}', text)
    
    email = email[0] if email else None
    phone = phone[0] if phone else None
    
    return email, phone

def extract_name(text, nlp):
    doc = nlp(text)
    for ent in doc.ents:
        if ent.label_ == 'PERSON':
            return ent.text
    return None

def extract_skills(text, nlp):
    doc = nlp(text)
    skills = []
    for token in doc:
        if token.ent_type_ == 'SKILL':
            skills.append(token.text)
    return ', '.join(skills)

def extract_experience(text, nlp):
    # A simple method to extract experience-related sections
    experience_sections = re.findall(r'(experience|work history|employment history)(.*?)(education|skills|$)', text, re.IGNORECASE | re.DOTALL)
    return experience_sections[0][1].strip() if experience_sections else ""

def extract_education(text, nlp):
    # A simple method to extract education-related sections
    education_sections = re.findall(r'(education|academic)(.*?)(experience|skills|$)', text, re.IGNORECASE | re.DOTALL)
    return education_sections[0][1].strip() if education_sections else ""


## 6. NLP Processing and Data Storage

In [16]:


def analyze_and_store_resume(file_path, nlp):
    text = extract_text_from_resume(file_path)
    
    # Extract information
    name = extract_name(text, nlp)
    email, phone = extract_contact_info(text)
    skills = extract_skills(text, nlp)
    experience = extract_experience(text, nlp)
    education = extract_education(text, nlp)
    
    # Store data in the database
    cursor.execute('''
        INSERT INTO resumes (name, email, phone, skills, experience, education)
        VALUES (?, ?, ?, ?, ?, ?)
    ''', (name, email, phone, skills, experience, education))
    
    conn.commit()
    
    print(f"Resume '{file_path}' analyzed and stored successfully!")


## 7. Output Analysis

In [17]:


def view_resumes():
    # Query the database and display resumes
    df = pd.read_sql_query("SELECT * FROM resumes", conn)
    print(df)

## Main Program
if __name__ == "__main__":
    # Analyze a sample resume file (replace with your file path)
    resume_file = 'C://Users//D.SURESH KUMAR//Desktop//SampleResume.pdf'
    analyze_and_store_resume(resume_file, nlp)
    
    # Display all stored resumes
    view_resumes()
    
    # Close the database connection
    conn.close()


Resume 'C://Users//D.SURESH KUMAR//Desktop//SampleResume.pdf' analyzed and stored successfully!
   id              name                         email       phone skills  \
0   1             Gmail  210701118@rajalakshmi.edu.in  7305822559          
1   2             Gmail  210701118@rajalakshmi.edu.in  7305822559          
2   3  Neuberger Berman                          None        None          

                                          experience  \
0                                                      
1                                                      
2  Neuberger Berman \nPortfolio Management Intern...   

                                           education  
0                                                     
1                                                     
2  Tufts University \nBachelor of Arts in Interna...  
