# <b>Image Metadata ETL

In [17]:
import os
from PIL import Image
import sqlite3
import time

## <b>Extract Data

In [18]:
def extract_image_files(directory):
    image_files = []  # Initializing an empty list to store image file paths
    
    # Iterating through files in the specified directory
    for file in os.listdir(directory):
        # Checking if the file has an image file extension
        if file.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff')):
            # Appending the absolute path of the image file to the list
            image_files.append(os.path.join(directory, file))
    
    return image_files  # Returning the list of image file paths

## <b>Transform Data

In [19]:
def transform_image_metadata(image_files):
    metadata = []  # Initializing an empty list to store transformed metadata
    
    # Iterating through each image file in the provided list
    for image_file in image_files:
        with Image.open(image_file) as img:
            # Extracting metadata from the image using Pillow (PIL)
            resolution = img.size  # Getting image resolution (width, height)
            file_size = os.path.getsize(image_file)  # Getting file size in bytes
            image_format = img.format  # Getting image file format (e.g., JPEG, PNG)
            mode = img.mode  # Getting image mode (e.g., RGB, RGBA)
            last_modified_time = time.ctime(os.path.getmtime(image_file))  # Getting last modified time
            
            # Appending a dictionary with metadata to the list
            metadata.append({
                'file_name': os.path.basename(image_file),  # Getting image file name
                'resolution': resolution,
                'file_size': file_size,
                'format': image_format,
                'mode': mode,
                'last_modified': last_modified_time
            })
    
    return metadata  # Returning the list of dictionaries containing transformed metadata

## <b>Load Data

In [20]:
def load_metadata_to_db(metadata, db_file='image_metadata.db'):
    conn = sqlite3.connect(db_file)
    c = conn.cursor()
    
    # Droping the existing table if it exists
    c.execute('''
        DROP TABLE IF EXISTS image_metadata
    ''')
    
    # Creating a new table with the updated schema
    c.execute('''
        CREATE TABLE IF NOT EXISTS image_metadata (
            file_name TEXT,
            resolution TEXT,
            file_size INTEGER,
            format TEXT,
            mode TEXT,
            last_modified TEXT
        )
    ''')
    
    for data in metadata:
        c.execute('''
            INSERT INTO image_metadata (file_name, resolution, file_size, format, mode, last_modified)
            VALUES (?, ?, ?, ?, ?, ?)
        ''', (data['file_name'], f"{data['resolution'][0]}x{data['resolution'][1]}", data['file_size'], data['format'], data['mode'], data['last_modified']))
    
    conn.commit()
    conn.close()

## <b>Main Code Run

In [21]:
if __name__ == "__main__":
    directory = 'Image_Data'  # The directory containing image files
    image_files = extract_image_files(directory)  # Extracting image files from directory
    metadata = transform_image_metadata(image_files)  # Transforming image metadata
    load_metadata_to_db(metadata)  # Loading transformed metadata into SQLite database