In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from fpdf import FPDF
import sys
import os

def generate_pdf_report(file_path):
    # Load dataset
    df = pd.read_csv(r"C:\Users\91778\Desktop\Tasks\Automated Report Generation\IEA-EV-dataEV salesHistoricalCars.csv")
    base_name = os.path.splitext(os.path.basename(file_path))[0]

    # Create preview table
    preview = df.head().to_string(index=False)

    # Basic info
    num_rows, num_cols = df.shape
    column_summary = df.describe(include='all', datetime_is_numeric=True).transpose()

    # Save correlation heatmap if numeric columns exist
    numeric_df = df.select_dtypes(include="number")
    if not numeric_df.empty:
        plt.figure(figsize=(8, 6))
        sns.heatmap(numeric_df.corr(), annot=True, cmap="coolwarm", fmt=".2f")
        plt.title("Correlation Heatmap")
        heatmap_path = f"{base_name}_correlation.png"
        plt.tight_layout()
        plt.savefig(heatmap_path)
        plt.close()
    else:
        heatmap_path = None

    # Create PDF
    pdf = FPDF()
    pdf.set_auto_page_break(auto=True, margin=15)
    pdf.add_page()

    # Title
    pdf.set_font("Arial", "B", 16)
    pdf.cell(0, 10, f"Dataset Report: {base_name}", ln=True, align="C")
    pdf.set_font("Arial", "", 12)
    pdf.ln(10)

    # Overview
    pdf.multi_cell(0, 10, f"This report provides a basic overview of the dataset '{base_name}'.")
    pdf.ln(2)
    pdf.multi_cell(0, 10, f"Number of rows: {num_rows}\nNumber of columns: {num_cols}")
    pdf.ln(5)

    # Column summary
    pdf.set_font("Arial", "B", 12)
    pdf.cell(0, 10, "Column Summary", ln=True)
    pdf.set_font("Arial", "", 10)
    for col in df.columns:
        dtype = df[col].dtype
        missing = df[col].isnull().sum()
        pdf.multi_cell(0, 8, f"- {col} (type: {dtype}, missing: {missing})")
    pdf.ln(5)

    # Data preview
    pdf.set_font("Arial", "B", 12)
    pdf.cell(0, 10, "Data Preview", ln=True)
    pdf.set_font("Courier", "", 8)
    for line in preview.split('\n'):
        pdf.cell(0, 5, line, ln=True)
    pdf.ln(5)

    # Insert heatmap
    if heatmap_path:
        pdf.set_font("Arial", "B", 12)
        pdf.cell(0, 10, "Correlation Heatmap", ln=True)
        pdf.image(heatmap_path, x=10, w=180)

    # Save PDF
    pdf_output = f"{base_name}_report.pdf"
    pdf.output(pdf_output)
    print(f"PDF report generated: {pdf_output}")

# To use in command line: python generate_dataset_report.py yourfile.csv
if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("Usage: python generate_dataset_report.py <your_dataset.csv>")
    else:
        generate_pdf_report(sys.argv[1])


PDF report generated: -f_report.pdf
