In [11]:
#importing required libraries
import os
from dotenv import load_dotenv
import google.generativeai as genai

# Load API key from .env
load_dotenv()
api_key = os.getenv("GEMINI_API_KEY")

# Configure Gemini client
genai.configure(api_key=api_key)

# Function to extract structured JSON from text using Gemini
def extract_json_with_gemini(text):
    model = genai.GenerativeModel(model_name="gemini-1.5-flash")

    prompt = f"""
Extract the following fields from the given text and return them in JSON format.
Apply the following preprocessing rules to each field:

- **name**: Extract only alphabetic characters (ignore numbers, punctuation, and special characters). Remove titles like Mr., Ms., Dr., etc.
- **date**: Convert any mentioned date into the format **DD/MM/YYYY**.
- **issue**: Provide a detailed, coherent paragraph summarizing the issue with as much relevant context as possible.
- **urgency**: Return `true` if the text indicates urgency (e.g., words like "urgent", "as soon as possible", "immediate"), otherwise return `false`.

Text:
\"\"\"{text}\"\"\"

Return only the JSON object.
"""

    response = model.generate_content(prompt)
    return response.text

# Test example
if __name__ == "__main__":
    test_text = "Met with Ravi Kumar on 5th June. He complained about a login issue. Needs it fixed ASAP."
    output = extract_json_with_gemini(test_text)
    print(" Gemini Output:\n", output)


 Gemini Output:
 ```json
{
  "name": "RaviKumar",
  "date": "05/06/2024",
  "issue": "Ravi Kumar reported a login issue requiring immediate attention.  The meeting occurred on June 5th, and the user needs the problem resolved as soon as possible.",
  "urgency": true
}
```



In [12]:
# Import required libraries
import os
import json
import pandas as pd
from dotenv import load_dotenv
import google.generativeai as genai

# Step 1: Load Gemini API key from .env file
load_dotenv()
api_key = os.getenv("GEMINI_API_KEY")

if not api_key:
    raise ValueError("GEMINI_API_KEY not found. Please check your .env file.")

# Step 2: Configure Gemini
genai.configure(api_key=api_key)

# Step 3: Define Gemini model
model = genai.GenerativeModel(model_name="gemini-1.5-flash")

# Step 4: Define messy input texts
texts = [
    "Met with Ravi Kumar on 5th June. He complained about a login issue. Needs it fixed ASAP.",
    "On 12th May, Priya reported a payment gateway bug. She said it's urgent.",
    "Ajay Sharma, called on 7th June regarding password reset. Low priority."
]

# Step 5: Define function to extract structured JSON
def extract_json(text):
    prompt = f"""
    Extract the following fields and return only valid JSON (no backticks, no extra text):
    Fields: name, date, issue, urgency

    Text: "{text}"
    """
    response = model.generate_content(prompt)
    content = response.text.strip()

    # Clean triple backticks if Gemini includes them
    if content.startswith("```json"):
        content = content.replace("```json", "").replace("```", "").strip()

    try:
        return json.loads(content)
    except json.JSONDecodeError:
        print("Failed to parse JSON:")
        print(content)
        return None

# Step 6: Collect structured data
results = []
for txt in texts:
    data = extract_json(txt)
    if data:
        results.append(data)

# Step 7: Convert to DataFrame and save to CSV
if results:
    df = pd.DataFrame(results)
    output_path = os.path.join(os.getcwd(), "structured_output.csv")
    df.to_csv(output_path, index=False)
    print(f"CSV file created: {output_path}")
else:
    print(" No valid data extracted to save.")


CSV file created: d:\OneDrive - Hitachi Solutions\prompt\structured_output.csv


In [13]:
# Load image
with open("C:/Users/kirithikakumar/Downloads/voice.png", "rb") as f:
    image_bytes = f.read()
# Call model with image and prompt
response = model.generate_content(
    [
        {"mime_type": "image/png", "data": image_bytes},  
        """
        Extract invoice details from the sale bill and return JSON only:

        {
          "invoice_number": "",
          "date": "",
          "company_name": "",
          "company_address": "",
          "gstin": "",
          "patient_name": "",
          "doctor_name": "",
          "items": [
            {
              "product": "",
              "quantity": "",
              "batch": "",
              "expiry": "",
              "mrp": 0.0,
              "discount": 0.0,
              "sgst": 0.0,
              "cgst": 0.0,
              "amount": 0.0
            }
          ],
          "sub_total": 0.0,
          "grand_total": 0.0,
          "total_in_words": ""
        }

        If a value is missing, leave it empty.
        """
    ]
)

print("Extracted JSON:\n", response.text)


Extracted JSON:
 ```json
{
  "invoice_number": "0000054",
  "date": "22-01-2022",
  "company_name": "MARG ERP LTD",
  "company_address": "Plot No. 7, Wazirpur\nNew Delhi-110035\n110085",
  "gstin": "07AGIPD5859J1Z2",
  "patient_name": "Rahul",
  "doctor_name": "Jayant Gupta",
  "items": [
    {
      "product": "4 ALL CAP",
      "quantity": "0:1",
      "batch": "AB001",
      "expiry": "01-2023",
      "mrp": 140.0,
      "discount": 10.0,
      "sgst": 6.0,
      "cgst": 6.0,
      "amount": 140.0
    },
    {
      "product": "4 ALL CAP",
      "quantity": "0:1",
      "batch": "12",
      "expiry": "01-2023",
      "mrp": 140.0,
      "discount": 10.0,
      "sgst": 6.0,
      "cgst": 6.0,
      "amount": 140.0
    }
  ],
  "sub_total": 280.0,
  "grand_total": 282.0,
  "total_in_words": "Two Hundrend Eighty Two Only"
}
```


In [14]:
#importing required libraries
import fitz 
#Extract text from PDF
def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text()
    return text.strip()

#Prompt Gemini to extract structured resume fields
def extract_json_with_gemini(text):
    model = genai.GenerativeModel(model_name="gemini-1.5-flash")

    prompt = f"""
Extract the following resume fields from the given text and return them in clean JSON format.

Required fields and instructions:

- **name**: Full name of the person. Only alphabetic characters, no titles like Mr., Ms., Dr.
- **email**: Extract a valid email address.
- **phone**: Extract the mobile number in international format if available.
- **objective**: Extract the career objective or summary statement.
- **education**: List all degrees with institution name, course, year, and grade (if available).
- **skills**: Return a list of all technical and soft skills mentioned.
- **work_experience**: List of previous roles with company name, job title, duration, and key responsibilities.
- **projects**: List project titles and a short summary of what each project involved.

Text:
\"\"\"{text}\"\"\"

Return only the structured JSON object.
"""
    response = model.generate_content(prompt)
    return response.text

# Run the script
if __name__ == "__main__":
    pdf_path = r"C:\Users\kirithikakumar\Downloads\ravi_kumar_resume.pdf"  # Use raw string for Windows paths
    text = extract_text_from_pdf(pdf_path)
    output_json = extract_json_with_gemini(text)
    print("Gemini Resume Output:\n", output_json)


Gemini Resume Output:
 ```json
{
  "name": "Ravi Kumar",
  "email": "ravi.kumar@email.com",
  "phone": "+91-9876543210",
  "objective": "To obtain a challenging position as a Software Engineer where I can apply my skills and grow professionally.",
  "education": [
    {
      "degree": "B.Tech in Computer Science",
      "institution": "Indian Institute of Technology (IIT), Delhi",
      "years": "2018 - 2022",
      "grade": "8.7/10"
    }
  ],
  "skills": [
    "Python",
    "Java",
    "JavaScript",
    "SQL",
    "REST APIs",
    "Git",
    "Docker"
  ],
  "work_experience": [
    {
      "company": "Infosys",
      "title": "Software Engineer",
      "duration": "July 2022 - Present",
      "responsibilities": [
        "Developed scalable APIs for internal tools.",
        "Collaborated with cross-functional teams to deliver projects on time.",
        "Improved application performance by 20% through code optimization."
      ]
    }
  ],
  "projects": [
    {
      "title": "Sma