In [None]:
# Import necessary libraries
import os
import tarfile
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
import seaborn as sns
from fpdf import FPDF

# Define the directory containing the .tar.gz files
DIR = '/path/to/your/files'
EXTRACT_DIR = os.path.join(DIR, 'extracted_images')

# Function to extract tar.gz files
def extract_tar_files(file_list, source_path, dest_path):
    if not os.path.exists(dest_path):
        os.makedirs(dest_path)
    for file in file_list:
        file_path = os.path.join(source_path, file)
        with tarfile.open(file_path, 'r:gz') as tar_ref:
            tar_ref.extractall(dest_path)
        print(f'Extracted {file} to {dest_path}')

# List all tar.gz files
tar_files = [f for f in os.listdir(DIR) if f.endswith('.tar.gz')]

# Extract all tar.gz files
extract_tar_files(tar_files, DIR, EXTRACT_DIR)

# Initialize PDF
pdf = FPDF()

# Function to perform EDA on an image
def analyze_image(image_path, pdf):
    img = Image.open(image_path).convert('L')  # Convert to grayscale
    img_array = np.array(img)

    # Display the image
    plt.imshow(img_array, cmap='gray')
    plt.axis('off')
    plt.title(f"Image: {os.path.basename(image_path)}")
    image_plot_path = f"{os.path.basename(image_path)}_plot.png"
    plt.savefig(image_plot_path)
    plt.close()
    
    # Histogram of pixel intensities
    plt.hist(img_array.flatten(), bins=50, color='c', alpha=0.75)
    plt.xlabel('Pixel Intensity')
    plt.ylabel('Frequency')
    plt.title(f"Histogram: {os.path.basename(image_path)}")
    histogram_path = f"{os.path.basename(image_path)}_histogram.png"
    plt.savefig(histogram_path)
    plt.close()

    # Adding image plot to PDF
    pdf.add_page()
    pdf.set_font("Arial", size=12)
    pdf.cell(200, 10, txt=f"Analysis of {os.path.basename(image_path)}", ln=True, align='C')
    pdf.image(image_plot_path, x=10, y=30, w=100)
    pdf.image(histogram_path, x=120, y=30, w=100)

    # Adding basic statistics to PDF
    pdf.ln(120)
    pdf.cell(200, 10, txt=f"Statistics for {os.path.basename(image_path)}:", ln=True)
    pdf.cell(200, 10, txt=f"Mean: {np.mean(img_array):.2f}", ln=True)
    pdf.cell(200, 10, txt=f"Standard Deviation: {np.std(img_array):.2f}", ln=True)
    pdf.cell(200, 10, txt=f"Min: {np.min(img_array)}", ln=True)
    pdf.cell(200, 10, txt=f"Max: {np.max(img_array)}", ln=True)
    
    # Clean up saved plots
    os.remove(image_plot_path)
    os.remove(histogram_path)

# List all image files in the extraction directory
image_files = [os.path.join(EXTRACT_DIR, f) for f in os.listdir(EXTRACT_DIR) if f.endswith(('.png', '.jpg', '.jpeg'))]

# Process each image file
for image_file in image_files:
    analyze_image(image_file, pdf)
    os.remove(image_file)  # Optionally, remove the image file after processing to save space

# Save PDF to the current directory
pdf_output_path = os.path.join(os.getcwd(), "image_analysis_report.pdf")
pdf.output(pdf_output_path)
print(f"PDF report saved to {pdf_output_path}")
