In [1]:
import boto3
import json
import nltk

Textract = boto3.client('textract')
bedrock = boto3.client('bedrock-runtime')

response = Textract.start_document_analysis(
    DocumentLocation={
        'S3Object': {
            'Bucket': 'raw-files-vvsks',
            'Name': 'resume-sample.pdf'
        }
    },
    FeatureTypes=['TABLES','LAYOUT']
)

In [2]:
content = Textract.get_document_analysis(JobId=response['JobId'])

In [3]:
def Get_document_analysis (jobId):
    blocks = []marketing executive
    response = Textract.get_document_analysis(JobId=jobId)
    blocks += response['Blocks']
    while 'NextToken' in response:
        response = Textract.get_document_analysis(JobId=jobId, NextToken = response['NextToken'])
        blocks += response['Blocks']
    return blocks

In [4]:

def get_text(result, blocks_map):
    text = ''
    if 'Relationships' in result:
        for relationship in result['Relationships']:
            if relationship['Type'] == 'CHILD':
                for child_id in relationship['Ids']:
                    try:
                        word = blocks_map[child_id]
                        if word['BlockType'] == 'WORD':
                            text += word['Text'] + ' '
                        if word['BlockType'] == 'SELECTION_ELEMENT':
                            if word['SelectionStatus'] == 'SELECTED':
                                text += 'X '
                    except KeyError:
                        print("Error extracting Table data - {}:".format(KeyError))
    return text


def get_rows_columns_map(table_result, blocks_map):
    rows = {}
    for relationship in table_result['Relationships']:
        if relationship['Type'] == 'CHILD':
            for child_id in relationship['Ids']:
                try:
                    cell = blocks_map[child_id]
                    if cell['BlockType'] == 'CELL':
                        row_index = cell['RowIndex']
                        col_index = cell['ColumnIndex']
                        if row_index not in rows:
                            # create new row
                            rows[row_index] = {}

                        # get the text value
                        rows[row_index][col_index] = get_text(cell, blocks_map)
                except KeyError:
                    print("Error extracting Table data - {}:".format(KeyError))
                    pass
    return rows

def generate_table_csv(table_result, blocks_map, table_index):
    rows = get_rows_columns_map(table_result, blocks_map)

    table_id = str(table_index)

    csv = '\n\nTable: {0}\n\n'.format(table_id)

    for row_index, cols in rows.items():

        for col_index, text in cols.items():
            csv += '{}'.format(text) + ","
        csv += '\n'

    return csv



def get_table_csv_results(blocks):

    blocks_map = {}
    table_blocks = []
    for block in blocks:
        blocks_map[block['Id']] = block
        if block['BlockType'] == "TABLE":
            table_blocks.append(block)

    if len(table_blocks) <= 0:
        return " NO Table in this page "

    csv = ''
    for index, table in enumerate(table_blocks):
        csv += generate_table_csv(table, blocks_map, index + 1)

    return csv

def detect_cells (blocks):
    ids = []
    for block in blocks:
        if block['BlockType'] == 'CELL':
            if 'Relationships' in block:
                for relationship in block['Relationships']: 
                    ids+= (relationship['Ids'])
            else:
                ids+= (block['Id'])
    return ids


def get_line_text(page):
    text = ''
    Ids = detect_cells(page)
    for block in page:
        if block['BlockType'] == 'LINE':
            is_in_cell = False  # Flag to check if the LINE block is within a CELL block
            for relationship in block['Relationships']:
                for Id in relationship['Ids']:
                    if Id in Ids:
                        is_in_cell = True  # This LINE block is within a CELL block
                        break
            if not is_in_cell:
                text += ' ' + block['Text']+'\n'
    return text


In [5]:
blocks = Get_document_analysis(response['JobId'])

In [6]:
resume_text = get_line_text(blocks)
resume_tables = get_table_csv_results(blocks)

In [8]:
print(resume_text)

 Name: Samarth Sandeep Joshi
 Email ID: samarthjoshi411@gmail.com
 Mob no.: 8149340630
 Computer: 1- - MS-CIT
 2- Digital Marketing
 Job Skills:
 Email marketing, SEO on
 page/off page/ google
 ads/ Facebook ads.
 EXPERIENCE:
 COMPANY NAME: REPORTS AND DATA
 POSITION: Email marketing executive (19 April 2021 - 6 August 2021)
 COMPANY NAME: ADVERT DIGITAL MANTRA
 POSITION: Digital marketing executive (Internship) (6 August 2021 - 31 January 2022)
 Company name : IT PRENEUR
 Position: digital marketing executive ( 13 November - 2022 - 1 August 2023
 EDUCATION
 STRENGTHS
 Ability to work under any Pressure
 HOBBIES
 Like to play Football, Bike Riding, Listening Songs
 PERSONAL DETAILS
 DECLARATION
 I hereby declare that all the details furnished here are true to the best of my knowledge and belief.
 Samarth Sandeep Joshi



In [7]:
print(resume_tables)



Table: 1

Degree/Course ,Institute/College ,University/Board ,Percentage/CGPA ,Year of passing ,
SSC ,PS English Medium Technical High School, Solapur ,Maharashtra State Board ,41% ,2016 ,
HSC ,MIT Vishwashanti Gurukul, Solapur ,Maharashtra State Board ,65% ,2017 ,
BBA ,Mangalvedhekar Institute of Management, Solapur ,Solapur University ,77.77% ,2021 ,
MBA ,SVERI's College of Engineering, Pandharpur ,Solapur University ,64% ,2023 ,


Table: 2

Address. No. 02, ,Dange Nagar, Barshi road, Bale, North Solapur, Solapur, Maharashtra, 413002 ,
Date of Birth: ,11-11-2000 ,
Gender: ,Male ,
Nationality: ,India. ,
Marital Status: ,Single ,
Languages ,Known: Marathi, Hindi, English ,



In [10]:

applicantDetails = "TEXT IN RESUME :\n"+resume_text+"\n\n\nTABLES IN RESUME :\n"+resume_tables

roleAndDescription = 'marketing executive'

output_format = """
"Candidate Name": "<<CANDIDATE NAME>>",
"Positives": "<<YOUR ASSESMENT>>",
"Negitives": "<<YOUR ASSESMENT>>",
"Score out of 10" : "<<YOUR SCORE>>",
"Is candidate suitable for the role" : "<<YES / NO>>",
"Remarks" : "<<YOUR REMARKS>>",
"Suggestions to applicant" : "<<YOUR SUGGESTIONS>>",
"Remarks" : "<<YOUR REMARKS>>"
"""

In [11]:

prompt =f"""
Human:
<Resume>
{applicantDetails}
</Resume>
Based on the provided resume, please assess whether the applicant's qualifications and experience align with the requirements and expectations of the position
nRole and Guidelines:
{roleAndDescription}

Answer in the the following JSON format output
{output_format}

Assistant:"""



In [13]:
body = json.dumps({"prompt": prompt,"max_tokens_to_sample": 300,"temperature": 0.001,"top_p": 0.9,})
modelId = 'anthropic.claude-v2'
accept = 'application/json'
contentType = 'application/json'

In [14]:
bedrock_response = bedrock.invoke_model_with_response_stream(body=body, modelId=modelId, accept=accept, contentType=contentType)
stream = bedrock_response.get('body')
final_text = ""

if stream:
    for event in stream:
        chunk = event.get('chunk')
        if chunk:
            completion = json.loads(chunk.get("bytes")).get('completion')
            print (json.loads(chunk.get ("bytes")).get('completion'), end="")
            if completion:
                final_text += completion

 Here is my assessment of the candidate's resume:

{
  "Candidate Name": "Samarth Sandeep Joshi",
  "Positives": [
    "Has experience as an email marketing executive", 
    "Has internship experience in digital marketing",
    "Has skills in SEO, Google Ads, Facebook Ads",
    "Has completed relevant certifications like Digital Marketing"
  ],
  "Negatives": [
    "Limited full-time work experience of only 1.5 years",
    "No major achievements or results highlighted from past roles" 
  ],
  "Score out of 10": 6,
  "Is candidate suitable for the role": "No",
  "Remarks": "The candidate has some relevant skills and experience but lacks sufficient full-time work experience for the marketing executive role",
  "Suggestions to applicant": [
    "Gain more full-time experience in email, social media and search marketing",
    "Highlight specific achievements, campaigns, results from past roles", 
    "Showcase proficiency in marketing tools like CRM, analytics etc."
  ]
}