In [1]:
import os
import pandas as pd
from langchain_community.document_loaders import DirectoryLoader
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_google_genai import ChatGoogleGenerativeAI

pdf_dir = "/Users/bhupathiraju_renisha/Desktop/trail/"
loader = DirectoryLoader(pdf_dir, glob="*.pdf", use_multithreading=True)
docs = loader.load()

if not docs:
    print("No documents loaded.")
    exit()

llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    api_key="AIzaSyDLsCYXLZPswtcr6dfJ4PgNLev-EWLBAcs",
)

class Resume(BaseModel):
    name: str = Field(default="", description="name from resume")
    phone: str = Field(default="", description="phone number from resume")
    email: str = Field(default="", description="email from resume")
    education: str = Field(default="", description="education from resume")
    projects: str = Field(default="", description="projects from resume")
    technical_skills: str = Field(default="", description="technical skills from resume")
    certifications: str = Field(default="", description="certifications from resume")
    languages: str = Field(default="", description="languages from resume")
    year_of_passing_university: str = Field(default="", description="year of passing university")
    GPA: str = Field(default="", description="GPA from resume")
    strength: str = Field(default="", description="strength from resume")
    weakness: str = Field(default="", description="weakness from resume")
    website: str = Field(default="", description="website from resume")
    reference: str = Field(default="", description="reference from resume")
    work_experience: str = Field(default="", description="work experience from resume")
    country: str = Field(default="", description="country from resume")
    occupation: str = Field(default="", description="occupation from resume")
    awards: str = Field(default="", description="awards from resume")
    activities: str = Field(default="", description="activities from resume")

parser = JsonOutputParser(pydantic_object=Resume)
prompt = PromptTemplate(
    template="Extract relevant details from the resume:\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

chain = prompt | llm | parser
answers = []

for i, doc in enumerate(docs):
    print(f"Processing Document {i+1}/{len(docs)}...")

    details = chain.invoke({"query": doc.page_content})
    cleaned_details = {key: ("" if value is None else value) for key, value in details.items()}
    
    print(f"Extracted Data: {cleaned_details}")
    answers.append(cleaned_details)

df = pd.DataFrame(answers).replace({None: ""})

if df.empty:
    print("No valid data extracted.")
else:
    print("DataFrame Preview:\n", df.head())

csv_file_path = "/Users/bhupathiraju_renisha/Documents/first/parquetfile/answers.csv"
if not df.empty:
    df.to_csv(csv_file_path, index=False, na_rep="")
    print(f"CSV saved successfully at: {csv_file_path}")
else:
    print("No data to save.")

if os.path.exists(csv_file_path):
    final_data = pd.read_csv(csv_file_path, dtype=str).fillna("")
    print("Final CSV Preview:\n", final_data.head())
    print("Columns:", final_data.columns)




For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  exec(code_obj, self.user_global_ns, self.user_ns)


Processing Document 1/10...
Extracted Data: {'name': 'BHUPATHIRAJU RENISHA', 'phone': '+91 8142678899', 'email': 'renishabhupathiraju@gmail.com', 'education': 'The State University of New York Artificial Intelligence - Diploma 2024\nSt Francis College For Women Post Graduate Diploma Business Analytics and Data Science 2024\nSt Francis College For Women Bachelor of Commerce International Business 2023\nPage Junior College Senior Secondary Education 2019\nBharatiya Vidya Bhavan’s Public School Secondary Education 2017', 'projects': 'Optimization of Machine Downtime -2024 Client-Leading Manufacturer of Fuel pumps\nA Study on Standard Charted Bank -2022', 'technical_skills': 'Python Power BI SQL Matplotlib Seaborn Tableau Tally Microsoft Word Microsoft Power Point Microsoft Excel', 'certifications': 'Business Analytics and Digital Media University of London 2020\nManagement Skills for International Business Indian School of Business 2020', 'year_of_passing_university': '2024', 'GPA': '', '

In [2]:
file_path = "/Users/bhupathiraju_renisha/Documents/first/parquetfile/answers.csv"
data = pd.read_csv(file_path)
data = data.fillna('')

In [3]:
data

Unnamed: 0,name,phone,email,education,projects,technical_skills,certifications,year_of_passing_university,GPA,strength,weakness,website,reference,work_experience,country,occupation,awards,activities,languages
0,BHUPATHIRAJU RENISHA,+91 8142678899,renishabhupathiraju@gmail.com,The State University of New York Artificial In...,Optimization of Machine Downtime -2024 Client-...,Python Power BI SQL Matplotlib Seaborn Tableau...,Business Analytics and Digital Media Universit...,2024.0,,,,,,,India,,,,
1,DANIEL GALLEGO,,hello@reallygreatsite.com,UX Industrial Basics and General Application U...,,Prototyping Tools User Research Information Ar...,"Professional Design Engineer (PDE) License, Pr...",2019.0,,,,www.reallygreatsite.com,,"Instant Chartz App, Morcelle Program\nJan 2023...",,UX DESIGNER,"Most Innovative Employer of the Year (2021), O...",,"English, French, Mandarin"
2,ISABEL MERCADO,+918877889988,hello@reallygreatsite.com,Ginyard International Co. University Bachelor'...,,,Executive Secretary and Business Administratio...,2020.0,,Strong organizational and time- management ski...,,,,Executive Secretary Ingoude Company | 2018 - P...,,Executive Secretary,,,
3,RICHARD SANCHEZ,+91 9988778899,hello@reallygreatsite.com,2029 - 2030 WARDIERE UNIVERSITY\nMaster of Bus...,,,,2030.0,3.8 / 4.0,,,www.reallygreatsite.com,Estelle Darcy\nHarper Richard\nWardiere Inc. /...,Borcelle Studio Marketing Manager & Specialist...,,Marketing Manager,,,English (Fluent) French (Fluent) German (Basic...
4,LAURICE MORETTI,+91 9966557788,hello@reallygreatsite.com,North State University | 2025-2027\nMaster of ...,,,Project Management | 2027 The Project Manageme...,2027.0,"3.5, 3.8",,,www.reallygreatsite.com,,Senior Systems Designer | 2030-2035 The IT Com...,,S Y S T E MS D E S I GN E R,Best Mentor Awardee,"Editor-in-Chief, The SCC Tribune; President, T...",
5,JACQUELINE THOMPSON,+91 8899778899,hello@reallygreatsite.com,Master of Science in Mechanical Engineering Un...,"Implemented cost-effective solutions, resultin...","Project Management, Structural Analysis, Robot...","Professional Engineer (PE) License, Project Ma...",2020.0,,,,www.reallygreatsite.com,,"Engineering Executive , Borcelle Technologies\...",,Engineering Executive,"Received the ""Engineering Excellence"" Award fo...",,"English, Malay, German"
6,HARPER RUSSO,+919944667788,hello@reallygreatsite.com,Warner & Spencer Graduate Diploma in Business ...,,,,,,P&L Management\nFinancial Reporting\nTeam Lead...,,@reallygreatsite,References are available on request.,Ginyard International Co. Operations Manager O...,,BUSINESS OPERATIONS MANAGER,,,
7,SHAWN GARCIA,+91 9988774499,hello@reallygreatsite.com,2014-2023\nBORCELLE UNIVERSITY\nSydney - Austr...,,Design Process\nProject Management\nData Visua...,,2023.0,,Management Skills\nCreativity\nDigital Marketi...,,,,2020 - 2022\nSTUDIO SHOWDE\nCanberra - Austral...,Australia,,,,
8,BENJAMIN SHAH,+123-456-7890,hello@reallygreatsite.com,Ginyard University\nBachelor of Business Manag...,,Digital Marketing,Digital Marketing Manager\nFauget Business Aca...,2016.0,3.8/4,Management Skills\nDigital Marketing\nNegotiat...,,,,Business Consultant\nTimmerman Industries\n201...,,Business Consultant,,,
9,OLIVIA WILSON,+91 9944332211,hello@reallygreatsite.com,2029 - 2030 BORCELLE UNIVERSITY\nMaster of Bus...,,,,2030.0,3.8 / 4.0,,,www.reallygreatsite.com,,2030 - PRESENT Borcelle Studio Marketing Manag...,,MARKETING MANAGER,,,English: Fluent French: Fluent German: Basics ...
