<a href="https://colab.research.google.com/github/Ghanshyambabariya/GrindAI/blob/main/Ra_Roughness_extraction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Ra, Rq, Rz - Surface roughness values extraction from the pdf files generated by Profilometer.

In [None]:
# Libraries and dependencies
import os
import re
import csv
import pdfplumber
from tqdm import tqdm

Please enter the input (read- pdfs) and output (write to csv) file paths after executing the code below.

In [None]:
def extract_roughness_parameters(pdf_path):
    """Extract Ra, Rq, and Rz values from a PDF file."""
    try:
        with pdfplumber.open(pdf_path) as pdf:
            full_text = ""

            # Extract text from all pages
            for page in pdf.pages:
                text = page.extract_text()
                if text:
                    full_text += text + " "

            # Various patterns to try
            patterns = [(r'Ra[^\n]*?(\d+[.,]\d+)', r'Rq[^\n]*?(\d+[.,]\d+)', r'Rz[^\n]*?(\d+[.,]\d+)')]

            # Try each pattern set
            for ra_pattern, rq_pattern, rz_pattern in patterns:
                ra_match = re.search(ra_pattern, full_text)
                rq_match = re.search(rq_pattern, full_text)
                rz_match = re.search(rz_pattern, full_text)

                if ra_match and rq_match and rz_match:
                    ra = ra_match.group(1).replace(',', '.')
                    rq = rq_match.group(1).replace(',', '.')
                    rz = rz_match.group(1).replace(',', '.')
                    return ra,rq,rz

            return None, None, None
    except Exception as e:
        print(f"Error processing {pdf_path}: {e}")
        return None, None, None

def process_folder(folder_path, output_csv, global_counter):
    """Process all PDF files in a folder and write results to CSV."""
    folder_name = os.path.basename(folder_path)

    # Find and sort PDF files from the folders
    pdf_files = [f for f in os.listdir(folder_path) if f.lower().endswith('.pdf')]
    pdf_files.sort(key=lambda x: [int(part) if part.isdigit() else part.lower()
        for part in re.split(r'(\d+)', x.split('-')[0])])

    data = []
    successful_count = 0

    for pdf_file in tqdm(pdf_files, desc=f"Processing {folder_name}"):
        pdf_path = os.path.join(folder_path, pdf_file)
        ra, rq, rz = extract_roughness_parameters(pdf_path)

        if ra is not None or rq is not None or rz is not None:
            successful_count += 1

        data.append({
            'Sample': global_counter[0],
            'Ra': ra,
            'Rq': rq,
            'Rz': rz,
            'File_name': pdf_file,
            'Folder_Name': folder_name
        })
        global_counter[0] += 1  # Increment the global counter

    # Write data to CSV file
    file_exists = os.path.isfile(output_csv)
    with open(output_csv, 'a' if file_exists else 'w', newline='') as csvfile:
        fieldnames = ['Sample', 'Ra', 'Rq', 'Rz', 'File_name', 'Folder_Name']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        if not file_exists:
            writer.writeheader()

        for row in data:
            writer.writerow(row)

    print(f"Extracted parameters from {successful_count}/{len(pdf_files)} files in {folder_name}")
    return successful_count

def main():
    base_dir = input("Enter the base directory containing measurement folders: ").strip()
    output_csv = input("Enter output CSV path: ").strip()

    if output_csv.endswith(('/')):
        output_csv = os.path.join(output_csv, "roughness_data.csv")

    # Get all measurement folders in natural order
    all_folders = []
    for entry in os.listdir(base_dir):
        full_path = os.path.join(base_dir, entry)
        if os.path.isdir(full_path) and entry.startswith("GVD"):
            all_folders.append(full_path)

    # Initialize CSV file
    if not os.path.exists(output_csv):
        with open(output_csv, 'w', newline='') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=['Sample', 'Ra', 'Rq', 'Rz', 'File_name', 'Folder_Name'])
            writer.writeheader()

    # Process folders in order
    total_files = 0
    total_successful = 0
    global_counter = [1]  # Using list to allow modification in nested functions

    for folder in all_folders:
        successful = process_folder(folder, output_csv, global_counter)
        total_successful += successful
        pdf_count = len([f for f in os.listdir(folder) if f.lower().endswith('.pdf')])
        total_files += pdf_count

    print("\nSummary:")
    print(f"Processed {len(all_folders)} folders containing {total_files} PDF files")
    print(f"Successfully extracted values from {total_successful} files")
    print(f"Results saved to: {output_csv}")

if __name__ == "__main__":
    main()
