In [None]:
# Just Coverting Visitng Cards into Excel Data

In [None]:
import pytesseract
from PIL import Image
import pandas as pd
import re
import os
import cv2

# Set Tesseract path for macOS (Homebrew)
pytesseract.pytesseract.tesseract_cmd = "/opt/homebrew/bin/tesseract"

In [None]:
def extract_text(image_path):
    image_cv = cv2.imread(image_path)
    gray = cv2.cvtColor(image_cv, cv2.COLOR_BGR2GRAY)
    gray = cv2.bilateralFilter(gray, 11, 17, 17)
    thresh = cv2.adaptiveThreshold(
        gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY, 11, 2
    )
    temp_path = "/Users/rohithkumar/Desktop/temp_processed.png"
    cv2.imwrite(temp_path, thresh)
    text = pytesseract.image_to_string(Image.open(temp_path), lang='eng', config='--psm 6')
    return text

In [None]:
def clean_text(raw_text):
    cleaned = re.sub(r'[^A-Za-z.\s]', '', raw_text)
    cleaned = re.sub(r'\s+', ' ', cleaned)
    return cleaned.strip()

def parse_details(text):
    lines = [line.strip() for line in text.split('\n') if line.strip()]
    
    name = "N/A"
    designation = "N/A"
    email = "N/A"
    phone = "N/A"

    email_pattern = re.compile(r'\b[\w\.-]+@[\w\.-]+\.\w+\b')
    phone_pattern = re.compile(r'(\+?\d[\d\s\-]{8,}\d)')

    for line in lines:
        if email == "N/A":
            match = email_pattern.search(line)
            if match:
                email = match.group()
        if phone == "N/A":
            match = phone_pattern.search(line)
            if match:
                phone = match.group()

    for i, line in enumerate(lines):
        if name == "N/A" and len(line.split()) >= 2:
            name = clean_text(line)
            # Look for designation - avoid junk/garbage
            if i + 1 < len(lines):
                desig_candidate = clean_text(lines[i + 1])
                # Remove leading "a " or stray characters
                desig_candidate = re.sub(r'^[aA]\s+', '', desig_candidate)
                # Use as designation only if it's alphabetic and long enough
                if desig_candidate and desig_candidate[0].isupper() and len(desig_candidate) > 5:
                    designation = desig_candidate
            break

    # Fallback: If any field is still blank or only whitespace, set to "N/A"
    return {
        "Name": name if name and name.strip() else "N/A",
        "Designation": designation if designation and designation.strip() else "N/A",
        "Email": email if email and email.strip() else "N/A",
        "Phone": phone if phone and phone.strip() else "N/A"
    }

In [None]:
def update_excel(data_dict, excel_path):
    # Final dict clean: guarantee no blank strings
    for k in data_dict:
        val = data_dict[k]
        if (val is None) or (not isinstance(val, str)) or (val.strip() == ""):
            data_dict[k] = "N/A"
    new_entry = pd.DataFrame([data_dict])
    if os.path.exists(excel_path):
        existing = pd.read_excel(excel_path)
        updated = pd.concat([existing, new_entry], ignore_index=True)
    else:
        updated = new_entry
    # Force "N/A" for any blank cell in the DataFrame before saving
    updated = updated.fillna("N/A").replace(r"^\s*$", "N/A", regex=True)
    updated.to_excel(excel_path, index=False)
    print(f"✅ Excel updated at: {excel_path}")

In [None]:
def process_visiting_card(image_path):
    print(f"\n📄 Processing: {os.path.basename(image_path)}")
    text = extract_text(image_path)
    details = parse_details(text)
    print("Extracted:", details)
    update_excel(details, "/Users/rohithkumar/Desktop/visiting_card_contacts.xlsx")

# Paths to your card images
cards = [
    "/Users/rohithkumar/Desktop/Pic.png",
    "/Users/rohithkumar/Desktop/Pics.png",
    "/Users/rohithkumar/Desktop/picss.png"
]

for card_path in cards:
    process_visiting_card(card_path)

In [None]:
# Included Audio Feature

In [2]:
import pytesseract
from PIL import Image
import pandas as pd
import re
import os
import cv2
import whisper  # OpenAI's Whisper ASR

# Set Tesseract path for macOS
pytesseract.pytesseract.tesseract_cmd = "/opt/homebrew/bin/tesseract"

# Card OCR extraction
def extract_text(image_path):
    image_cv = cv2.imread(image_path)
    gray = cv2.cvtColor(image_cv, cv2.COLOR_BGR2GRAY)
    gray = cv2.bilateralFilter(gray, 11, 17, 17)
    thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
    temp_path = "/Users/rohithkumar/Desktop/temp_processed.png"
    cv2.imwrite(temp_path, thresh)
    text = pytesseract.image_to_string(Image.open(temp_path), lang='eng', config='--psm 6')
    return text

def clean_text(raw_text):
    cleaned = re.sub(r'[^A-Za-z.\s]', '', raw_text)
    cleaned = re.sub(r'\s+', ' ', cleaned)
    return cleaned.strip()

def parse_details(text):
    lines = [line.strip() for line in text.split('\n') if line.strip()]
    name = "N/A"
    designation = "N/A"
    email = "N/A"
    phone = "N/A"
    email_pattern = re.compile(r'\b[\w\.-]+@[\w\.-]+\.\w+\b')
    phone_pattern = re.compile(r'(\+?\d[\d\s\-]{8,}\d)')
    for line in lines:
        if email == "N/A":
            match = email_pattern.search(line)
            if match:
                email = match.group()
        if phone == "N/A":
            match = phone_pattern.search(line)
            if match:
                phone = match.group()
    for i, line in enumerate(lines):
        if name == "N/A" and len(line.split()) >= 2:
            name = clean_text(line)
            # Look for designation - avoid junk/garbage
            if i + 1 < len(lines):
                desig_candidate = clean_text(lines[i + 1])
                desig_candidate = re.sub(r'^[aA]\s+', '', desig_candidate)
                if desig_candidate and desig_candidate[0].isupper() and len(desig_candidate) > 5:
                    designation = desig_candidate
            break
    return {
        "Name": name if name and name.strip() else "N/A",
        "Designation": designation if designation and designation.strip() else "N/A",
        "Email": email if email and email.strip() else "N/A",
        "Phone": phone if phone and phone.strip() else "N/A"
    }

def transcribe_voice(audio_path):
    # Loads the small Whisper model (you can use 'base', 'small', 'medium', 'large' for better accuracy)
    model = whisper.load_model("small")
    result = model.transcribe(audio_path)
    text = result["text"].strip()
    return text if text else "N/A"

def update_excel(data_dict, excel_path):
    for k in data_dict:
        val = data_dict[k]
        if (val is None) or (not isinstance(val, str)) or (val.strip() == ""):
            data_dict[k] = "N/A"
    new_entry = pd.DataFrame([data_dict])
    if os.path.exists(excel_path):
        existing = pd.read_excel(excel_path)
        updated = pd.concat([existing, new_entry], ignore_index=True)
    else:
        updated = new_entry
    updated = updated.fillna("N/A").replace(r"^\s*$", "N/A", regex=True)
    updated.to_excel(excel_path, index=False)
    print(f"✅ Excel updated at: {excel_path}")

def process_visiting_card(image_path, audio_path=None):
    print(f"\n📄 Processing: {os.path.basename(image_path)}")
    text = extract_text(image_path)
    details = parse_details(text)
    # Transcribe comment if audio provided
    if audio_path and os.path.exists(audio_path):
        comment = transcribe_voice(audio_path)
    else:
        comment = "N/A"
    details["Comments"] = comment
    print("Extracted:", details)
    update_excel(details, "/Users/rohithkumar/Desktop/visiting_card_contacts.xlsx")

# Example usage:
cards_and_audio = [
    ("/Users/rohithkumar/Desktop/Pic.png", "/Users/rohithkumar/Desktop/Pic.mp3"),
    ("/Users/rohithkumar/Desktop/Pics.png", "/Users/rohithkumar/Desktop/Pics.mp3"),
    ("/Users/rohithkumar/Desktop/picss.png", "/Users/rohithkumar/Desktop/picss.mp3")
]

for img_path, aud_path in cards_and_audio:
    process_visiting_card(img_path, aud_path)


📄 Processing: Pic.png


100%|███████████████████████████████████████| 461M/461M [00:10<00:00, 47.6MiB/s]
  checkpoint = torch.load(fp, map_location=device)


Extracted: {'Name': 'Saptarshi Mukhopadhyay', 'Designation': 'Seer Pad Manager Age', 'Email': 'N/A', 'Phone': 'N/A', 'Comments': 'This is the command for the pick 3 which is the last pick. I need it to be updated properly in the excel sheet and it should be good.'}
✅ Excel updated at: /Users/rohithkumar/Desktop/visiting_card_contacts.xlsx

📄 Processing: Pics.png


  checkpoint = torch.load(fp, map_location=device)


Extracted: {'Name': 'KimberlyiNguyen', 'Designation': 'GenerallDoctor', 'Email': 'hello@reallygreatsite.com', 'Phone': '123-456-7890', 'Comments': 'This is the comment for the picture 2 which is PIX, I think they should be updated in Excel sheet.'}
✅ Excel updated at: /Users/rohithkumar/Desktop/visiting_card_contacts.xlsx

📄 Processing: picss.png


  checkpoint = torch.load(fp, map_location=device)


Extracted: {'Name': 'Dr. Dani Martinez', 'Designation': 'Marketing Manager oS', 'Email': 'hello@reallygreatsite.com', 'Phone': '123-456-7890', 'Comments': 'I think this is the comment for the pic one and this is the first picture.'}
✅ Excel updated at: /Users/rohithkumar/Desktop/visiting_card_contacts.xlsx
