In [4]:
import os
import pandas as pd
from doctr.io import DocumentFile
from doctr.models import ocr_predictor
from tqdm import tqdm

# Initialize the OCR predictor
model = ocr_predictor(pretrained=True)

# Define the image folder and output file
image_folder = '../images'
output_file = 'output.csv'

# List to store the results
results = []

# Process each image in the folder
for index, file in enumerate(tqdm(os.listdir(image_folder))):
    image_path = os.path.join(image_folder, file)
    try:
        # Load the image
        doc = DocumentFile.from_images(image_path)
        
        # Perform OCR
        result = model(doc)
        
        # Extract text
        extracted_text = result.pages[0].blocks[0].lines[0].words[0].value if result.pages else ""
        
        # Append the result
        results.append([index, extracted_text])
    except Exception as e:
        print(f"Error processing {file}: {str(e)}")
        results.append([index, ""])

# Save the results to a CSV file
df = pd.DataFrame(results, columns=['index', 'prediction'])
df.to_csv(output_file, index=False)

print(f"CSV file '{output_file}' has been created with the predictions.")

100%|██████████| 54/54 [04:33<00:00,  5.07s/it]

CSV file 'output.csv' has been created with the predictions.





In [11]:
import os
import pandas as pd
from doctr.io import DocumentFile
from doctr.models import ocr_predictor
from tqdm import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer

# Initialize the OCR predictor
model = ocr_predictor(pretrained=True)

# Define the image folder and output file
image_folder = '../images'
output_file = 'output_llama.csv'

# List to store the results
results = []

# Load the LLaMA model and tokenizer
llama_model_name = "meta-llama/Meta-Llama-3.1-70B-Instruct"  # Replace with the actual model path or name
tokenizer = AutoTokenizer.from_pretrained(llama_model_name)
llama_model = AutoModelForCausalLM.from_pretrained(llama_model_name)

# Function to retrieve entity values using LLaMA
def retrieve_entity_values(text):
    inputs = tokenizer(text, return_tensors="pt")
    outputs = llama_model.generate(**inputs)
    entity_values = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return entity_values

# Process each image in the folder
for index, file in enumerate(tqdm(os.listdir(image_folder))):
    image_path = os.path.join(image_folder, file)
    try:
        # Load the image
        doc = DocumentFile.from_images(image_path)
        
        # Perform OCR
        result = model(doc)
        
        # Extract text
        extracted_text = result.pages[0].blocks[0].lines[0].words[0].value if result.pages else ""
        
        # Retrieve entity values using LLaMA
        entity_values = retrieve_entity_values(extracted_text)
        
        # Append the result
        results.append([index, entity_values])
    except Exception as e:
        print(f"Error processing {file}: {str(e)}")
        results.append([index, ""])

# Save the results to a CSV file
df = pd.DataFrame(results, columns=['index', 'prediction'])
df.to_csv(output_file, index=False)

print(f"CSV file '{output_file}' has been created with the predictions.")

Downloading shards:   7%|▋         | 2/30 [16:29<3:50:46, 494.50s/it]


KeyboardInterrupt: 

In [9]:
from huggingface_hub import login
login(token="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") 
add_to_git_credential=True


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to C:\Users\Dev\.cache\huggingface\token
Login successful


In [2]:
import os
import pandas as pd
from doctr.io import DocumentFile
from doctr.models import ocr_predictor
from tqdm import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer
# from huggingface_hub import login

# Login to Hugging Face
# login(token="your_huggingface_token")  # Replace with your Hugging Face token

# Initialize the OCR predictor
model = ocr_predictor(pretrained=True)

# Define the image folder and output file
image_folder = '../images2'
output_file = 'test_out3.csv'

# List to store the results
results = []

# Define the storage path for the LLaMA model
storage_path = 'F:\models'  # Replace with your desired storage path

# Ensure the storage path exists
os.makedirs(storage_path, exist_ok=True)

# Load the LLaMA model and tokenizer
llama_model_name = "meta-llama/Meta-Llama-3.1-70B-Instruct"  # Replace with the actual model path or name
tokenizer = AutoTokenizer.from_pretrained(llama_model_name, cache_dir=storage_path)
llama_model = AutoModelForCausalLM.from_pretrained(llama_model_name, cache_dir=storage_path)

# Function to retrieve entity values using LLaMA
def retrieve_entity_values(text):
    inputs = tokenizer(text, return_tensors="pt")
    outputs = llama_model.generate(**inputs)
    entity_values = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return entity_values

# Process each image in the folder
for index, file in enumerate(tqdm(os.listdir(image_folder))):
    image_path = os.path.join(image_folder, file)
    try:
        # Load the image
        doc = DocumentFile.from_images(image_path)
        
        # Perform OCR
        result = model(doc)
        
        # Extract text
        extracted_text = result.pages[0].blocks[0].lines[0].words[0].value if result.pages else ""
        
        # Retrieve entity values using LLaMA
        entity_values = retrieve_entity_values(extracted_text)
        
        # Append the result
        results.append([index, entity_values])
    except Exception as e:
        print(f"Error processing {file}: {str(e)}")
        results.append([index, ""])

# Save the results to a CSV file
df = pd.DataFrame(results, columns=['index', 'prediction'])
df.to_csv(output_file, index=False)

print(f"CSV file '{output_file}' has been created with the predictions.")

  storage_path = 'F:\models'  # Replace with your desired storage path


: 

In [2]:
import os
import pandas as pd
from doctr.io import DocumentFile
from doctr.models import ocr_predictor
from tqdm import tqdm

def perform_ocr(image_folder, ocr_output_file):
    # Initialize the OCR predictor
    model = ocr_predictor(pretrained=True)

    # List to store the results
    results = []

    # Process each image in the folder
    for index, file in enumerate(tqdm(os.listdir(image_folder))):
        image_path = os.path.join(image_folder, file)
        try:
            # Load the image
            doc = DocumentFile.from_images(image_path)
            
            # Perform OCR
            result = model(doc)
            
            # Extract text
            extracted_text = result.pages[0].blocks[0].lines[0].words[0].value if result.pages else ""
            
            # Append the result
            results.append([index, extracted_text])
        except Exception as e:
            print(f"Error processing {file}: {str(e)}")
            results.append([index, ""])

    # Save the results to a CSV file
    df = pd.DataFrame(results, columns=['index', 'ocr_text'])
    df.to_csv(ocr_output_file, index=False)

    print(f"CSV file '{ocr_output_file}' has been created with the OCR results.")

In [3]:
import os
import pandas as pd
from tqdm import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer

# model_path = "F:\models\models--meta-llama--Meta-Llama-3.1-70B-Instruct"
def load_llama_model(model_path):
    # Load the LLaMA model and tokenizer from local storage
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModelForCausalLM.from_pretrained(model_path)
    return tokenizer, model

def retrieve_entity_values(text, tokenizer, model):
    inputs = tokenizer(text, return_tensors="pt")
    outputs = model.generate(**inputs)
    entity_values = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return entity_values

def process_ocr_results(ocr_file, output_file, model_path):
    # Load the LLaMA model and tokenizer
    tokenizer, model = load_llama_model(model_path)

    # Load the OCR results
    df = pd.read_csv(ocr_file)

    # List to store the results
    results = []

    # Process each OCR result
    for index, row in tqdm(df.iterrows(), total=df.shape[0]):
        ocr_text = row['ocr_text']
        try:
            # Retrieve entity values using the LLaMA model
            entity_values = retrieve_entity_values(ocr_text, tokenizer, model)
            
            # Append the result
            results.append([row['index'], entity_values])
        except Exception as e:
            print(f"Error processing index {row['index']}: {str(e)}")
            results.append([row['index'], ""])

    # Save the results to a CSV file
    result_df = pd.DataFrame(results, columns=['index', 'prediction'])
    result_df.to_csv(output_file, index=False)

    print(f"CSV file '{output_file}' has been created with the predictions.")

  model_path = "F:\models\models--meta-llama--Meta-Llama-3.1-70B-Instruct"


In [11]:


# Define paths
image_folder = '../images'
ocr_output_file = 'ocr_results.csv'
final_output_file = 'final_predictions.csv'
llama_model_path = 'F:\models\models--meta-llama--Meta-Llama-3.1-70B-Instruct\config.json'  # Replace with your local LLaMA model path

# Step 1: Perform OCR
# perform_ocr(image_folder, ocr_output_file)

# Step 2: Retrieve Entity Values using LLaMA Model from Local Storage
process_ocr_results(ocr_output_file, final_output_file, llama_model_path)

  llama_model_path = 'F:\models\models--meta-llama--Meta-Llama-3.1-70B-Instruct\config.json'  # Replace with your local LLaMA model path
  llama_model_path = 'F:\models\models--meta-llama--Meta-Llama-3.1-70B-Instruct\config.json'  # Replace with your local LLaMA model path


OSError: Incorrect path_or_model_id: 'F:\models\models--meta-llama--Meta-Llama-3.1-70B-Instruct\config.json'. Please provide either the path to a local folder or the repo_id of a model on the Hub.

In [1]:
import os
import pandas as pd
import requests
from doctr.io import DocumentFile
from doctr.models import ocr_predictor
from tqdm import tqdm

# Initialize the OCR predictor
model = ocr_predictor(pretrained=True)

# Define the image folder and output file
image_folder = '../images2'
output_file = 'test_out3.csv'

# List to store the results
results = []

# Hugging Face API details
api_url = "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3.1-70B-Instruct"  # Replace with the actual model path or name
api_token = "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"  # Replace with your Hugging Face API token

headers = {
    "Authorization": f"Bearer {api_token}"
}

# Function to retrieve entity values using Hugging Face Inference API
def retrieve_entity_values(text):
    payload = {
        "inputs": text,
    }
    response = requests.post(api_url, headers=headers, json=payload)
    if response.status_code == 200:
        entity_values = response.json()[0]['generated_text']
        return entity_values
    else:
        print(f"Error: {response.status_code}, {response.text}")
        return ""

# Process each image in the folder
for index, file in enumerate(tqdm(os.listdir(image_folder))):
    image_path = os.path.join(image_folder, file)
    try:
        # Load the image
        doc = DocumentFile.from_images(image_path)
        
        # Perform OCR
        result = model(doc)
        
        # Extract text
        extracted_text = result.pages[0].blocks[0].lines[0].words[0].value if result.pages else ""
        
        # Retrieve entity values using Hugging Face Inference API
        entity_values = retrieve_entity_values(extracted_text)
        
        # Append the result
        results.append([index, entity_values])
    except Exception as e:
        print(f"Error processing {file}: {str(e)}")
        results.append([index, ""])

# Save the results to a CSV file
df = pd.DataFrame(results, columns=['index', 'prediction'])
df.to_csv(output_file, index=False)

print(f"CSV file '{output_file}' has been created with the predictions.")

  from .autonotebook import tqdm as notebook_tqdm
  state_dict = torch.load(archive_path, map_location="cpu")
  0%|          | 1/22917 [00:02<16:17:01,  2.56s/it]

Error: 400, {"error":"Model requires a Pro subscription; check out hf.co/pricing to learn more. Make sure to include your HF token in your query."}


  0%|          | 2/22917 [00:04<14:46:35,  2.32s/it]

Error: 400, {"error":"Model requires a Pro subscription; check out hf.co/pricing to learn more. Make sure to include your HF token in your query."}


  0%|          | 3/22917 [00:06<13:50:20,  2.17s/it]

Error: 400, {"error":"Model requires a Pro subscription; check out hf.co/pricing to learn more. Make sure to include your HF token in your query."}


  0%|          | 4/22917 [00:08<13:38:47,  2.14s/it]

Error: 400, {"error":"Model requires a Pro subscription; check out hf.co/pricing to learn more. Make sure to include your HF token in your query."}


  0%|          | 4/22917 [00:09<15:51:05,  2.49s/it]


KeyboardInterrupt: 