In [5]:
import boto3
import json
import os
import zipfile
import logging
from botocore.exceptions import ClientError

# Configure logging
logging.basicConfig(level=logging.INFO)

# You should define your bucket somewhere
BUCKET = 'your-bucket-name'  # Make sure to define this

def unzip_data(input_data_path):
    try:
        # Verify the file exists
        if not os.path.exists(input_data_path):
            raise FileNotFoundError(f"The file {input_data_path} does not exist")
            
        # Verify it's a zip file
        if not zipfile.is_zipfile(input_data_path):
            raise ValueError(f"The file {input_data_path} is not a valid ZIP file")
            
        with zipfile.ZipFile(input_data_path, 'r') as input_data_zip:
            # Print debug info about the zip file
            logging.info(f"Zip file contents: {input_data_zip.namelist()}")
            input_data_zip.extractall('.')
            logging.info(f"Successfully extracted {input_data_path} to current directory")
            
    except Exception as e:
        logging.error(f"Error unzipping {input_data_path}: {str(e)}")
        raise

def upload_file_to_s3(file_name, s3_prefix):
    object_name = os.path.join(s3_prefix, file_name)
    s3_client = boto3.client('s3')
    try:
        response = s3_client.upload_file(file_name, BUCKET, object_name)
        logging.info(f"Successfully uploaded {file_name} to s3://{BUCKET}/{object_name}")
        return True
    except ClientError as e:
        logging.error(f"Error uploading {file_name} to S3: {e}")
        return False
    except Exception as e:
        logging.error(f"Unexpected error: {e}")
        return False

if __name__ == "__main__":
    zip_file = 'dogImages.zip'
    
    # Verify the file exists before trying to unzip
    if not os.path.exists(zip_file):
        # Try to find the file with different paths
        possible_paths = [
            os.path.join('data', zip_file),
            os.path.join('../', zip_file),
            os.path.join(os.path.dirname(__file__), zip_file)
        ]
        
        for path in possible_paths:
            if os.path.exists(path):
                zip_file = path
                break
        else:
            raise FileNotFoundError(f"Could not find {zip_file} in any of these locations: {possible_paths}")
    
    logging.info(f"Using zip file at: {os.path.abspath(zip_file)}")
    
    # Unzip the file
    unzip_data(zip_file)
    
    # Example of how you might upload files after unzipping
    # for root, dirs, files in os.walk('dogImages'):
    #     for file in files:
    #         upload_file_to_s3(os.path.join(root, file), 'dog-images')

INFO:root:Using zip file at: /home/sagemaker-user/udemy-aws-machine_learning/udacity_mle/CD0387-deep-learning-topics-within-computer-vision-nlp-project-starter/dogImages.zip
ERROR:root:Error unzipping dogImages.zip: The file dogImages.zip is not a valid ZIP file


ValueError: The file dogImages.zip is not a valid ZIP file