In [1]:
from fpdf import FPDF
import pandas as pd

def generate_pdfs_from_rows(df, n, output_folder="pdfs"):
    """Generates n PDFs from n rows of a dataframe.
    
    Args:
        df (pd.DataFrame): The input dataframe.
        n (int): Number of PDFs to generate (from the first n rows).
        output_folder (str): Folder to save the PDFs.
    """
    import os
    os.makedirs(output_folder, exist_ok=True)
    
    for i in range(min(n, len(df))):
        pdf = FPDF()
        pdf.set_auto_page_break(auto=True, margin=15)
        pdf.add_page()
        pdf.set_font("Arial", size=12)

        row = df.iloc[i]
        content = "\n".join(f"{col}: {row[col]}" for col in df.columns)

        pdf.multi_cell(0, 10, content)
        
        pdf_path = f"{output_folder}/Medical_invoice_{i+1}.pdf"
        pdf.output(pdf_path)
        print(f"Saved: {pdf_path}")

# Example usage:
df = pd.read_csv(r"F:\Medical_Fraud\Dataset\final_dataset_preprocessed.csv")
df = df.drop(columns=["PotentialFraud"])

generate_pdfs_from_rows(df, n=10)


Saved: pdfs/Medical_invoice_1.pdf
Saved: pdfs/Medical_invoice_2.pdf
Saved: pdfs/Medical_invoice_3.pdf
Saved: pdfs/Medical_invoice_4.pdf
Saved: pdfs/Medical_invoice_5.pdf
Saved: pdfs/Medical_invoice_6.pdf
Saved: pdfs/Medical_invoice_7.pdf
Saved: pdfs/Medical_invoice_8.pdf
Saved: pdfs/Medical_invoice_9.pdf
Saved: pdfs/Medical_invoice_10.pdf


In [None]:
def generate_fraudpdfs_from_rows(df, n, output_folder="pdfs"):
    """Generates n PDFs from n rows of a dataframe.
    
    Args:
        df (pd.DataFrame): The input dataframe.
        n (int): Number of PDFs to generate (from the first n rows).
        output_folder (str): Folder to save the PDFs.
    """
    import os
    os.makedirs(output_folder, exist_ok=True)
    
    for i in range(min(n, len(df))):
        pdf = FPDF()
        pdf.set_auto_page_break(auto=True, margin=15)
        pdf.add_page()
        pdf.set_font("Arial", size=12)

        row = df.iloc[i]
        content = "\n".join(f"{col}: {row[col]}" for col in df.columns)

        pdf.multi_cell(0, 10, content)
        
        pdf_path = f"{output_folder}/Fraud_Medical_invoice_{i+1}.pdf"
        pdf.output(pdf_path)
        print(f"Saved: {pdf_path}")


In [6]:
data = data = {
    "BeneID": [0.000002],
    "ClaimID": [0.00000003],
    "ClaimStartDt": [0.000001],
    "ClaimEndDt": [0.000004],
    "Provider": [0.0000005],
    "InscClaimAmtReimbursed": [0.00000007],
    "AttendingPhysician": [0.000003],
    "OperatingPhysician": [0.0000025],
    "OtherPhysician": [0.0000015],
    "AdmissionDt": [0.0000042],
    "ClmAdmitDiagnosisCode": [0.0000009],
    "DeductibleAmtPaid": [0.0000023],
    "DischargeDt": [0.0000017],
    "DiagnosisGroupCode": [0.0000035],
    "ClmDiagnosisCode_1": [0.0000006],
    "ClmDiagnosisCode_2": [0.0000011],
    "ClmDiagnosisCode_3": [0.0000022],
    "ClmDiagnosisCode_4": [0.0000018],
    "ClmDiagnosisCode_5": [0.0000027],
    "ClmDiagnosisCode_6": [0.0000031],
    "ClmDiagnosisCode_7": [0.0000008],
    "ClmDiagnosisCode_8": [0.0000024],
    "ClmDiagnosisCode_9": [0.0000016],
    "ClmDiagnosisCode_10": [0.0000032],
    "ClmProcedureCode_1": [0.0000004],
    "ClmProcedureCode_2": [0.0000029],
    "ClmProcedureCode_3": [0.0000012],
    "ClmProcedureCode_4": [0.0000033],
    "ClmProcedureCode_5": [0.0000026],
    "DOB": [0.0000005],
    "DOD": [0.0000014],
    "Gender": [0.0000039],
    "Race": [0.0000013],
    "RenalDiseaseIndicator": [0.0000028],
    "State": [0.0000007],
    "County": [0.0000036],
    "NoOfMonths_PartACov": [0.0000019],
    "NoOfMonths_PartBCov": [0.0000037],
    "ChronicCond_Alzheimer": [0.0000021],
    "ChronicCond_Heartfailure": [0.0000003],
    "ChronicCond_KidneyDisease": [0.0000038],
    "ChronicCond_Cancer": [0.000001],
    "ChronicCond_ObstrPulmonary": [0.0000025],
    "ChronicCond_Depression": [0.0000009],
    "ChronicCond_Diabetes": [0.0000034],
    "ChronicCond_IschemicHeart": [0.0000006],
    "ChronicCond_Osteoporasis": [0.0000022],
    "ChronicCond_rheumatoidarthritis": [0.0000017],
    "ChronicCond_stroke": [0.000003],
    "IPAnnualReimbursementAmt": [0.0000002],
    "IPAnnualDeductibleAmt": [0.0000015],
    "OPAnnualReimbursementAmt": [0.0000023],
    "OPAnnualDeductibleAmt": [0.0000032]
}

frauddf = pd.DataFrame(data)
generate_fraudpdfs_from_rows(frauddf, n=1)

Saved: pdfs/Fraud_Medical_invoice_1.pdf
