In [1]:
import boto3 , os

def list_s3_files(bucket_name, prefix=''):
    # Initialize the S3 client
    s3 = boto3.client(
        's3',
        aws_access_key_id=os.getenv("ACCESS_KEY"),
        aws_secret_access_key=os.getenv("SECRET_KEY")
    )
    
    # List to store file names
    file_names = []
    
    # Use paginator for handling large number of files
    paginator = s3.get_paginator('list_objects_v2')
    pages = paginator.paginate(Bucket=bucket_name, Prefix=prefix)
    
    for page in pages:
        if 'Contents' in page:
            for obj in page['Contents']:
                file_names.append(obj['Key'])
    
    return file_names

# Example usage
bucket_name = 'pythonteam'  # Replace with your S3 bucket name
prefix = ''  # Optionally, specify a folder path to narrow the results

file_names = list_s3_files(bucket_name, prefix)
print("Files in S3 Bucket:")
for file_name in file_names:
    print(file_name)


Files in S3 Bucket:
new folder/
new folder/10-28-24.png
new folder/10-29-24.png
new folder/10-30-24.png
new folder/10-31-24.png
new folder/11-01-24.png
new folder/11-02-24.png
new folder/11-03-24.png


In [8]:
import boto3
import os 
import stat
import tempfile
# # AWS credentials configuration
# aws_access_key_id = 'your-access-key-id'
# aws_secret_access_key = 'your-secret-access-key'
# region_name = 'your-region'

# S3 bucket and local directory setup
bucket_name = 'pythonteam'
local_dir = './downloaded_files'  # Local directory where files will be saved

# Create the local directory if it doesn't exist
download_path = tempfile.mkdtemp()
print("Temporary download path:", download_path)

# Initialize the S3 client
s3 = boto3.client(
        's3',
        aws_access_key_id=os.getenv("ACCESS_KEY"),
        aws_secret_access_key=os.getenv("SECRET_KEY")
    )

# List and download all files from the S3 bucket
def download_files_from_s3():
    # List objects in the specified S3 bucket
    response = s3.list_objects_v2(Bucket=bucket_name)

    # Check if there are contents in the bucket
    if 'Contents' in response:
        for obj in response['Contents']:
            file_key = obj['Key']
            local_file_path = os.path.join(download_path, os.path.basename(file_key))
            print(local_file_path)
            # Download each file
            print(f"Downloading {file_key} to {local_file_path}")
            s3.download_file(bucket_name, file_key, local_file_path)
        print("All files downloaded successfully.")
    else:
        print("No files found in the bucket.")

# Run the download function
download_files_from_s3()


Temporary download path: C:\Users\user\AppData\Local\Temp\tmp5wk8zlh0
C:\Users\user\AppData\Local\Temp\tmp5wk8zlh0\
Downloading new folder/ to C:\Users\user\AppData\Local\Temp\tmp5wk8zlh0\


PermissionError: [WinError 5] Access is denied: 'C:\\Users\\user\\AppData\\Local\\Temp\\tmp5wk8zlh0\\'

In [10]:
import boto3
import os
import cv2
import numpy as np
from botocore.exceptions import NoCredentialsError
import streamlit as st

# Access AWS credentials from environment variables
s3_access_key = os.getenv("ACCESS_KEY")
s3_secret_key = os.getenv("SECRET_KEY")
bucket_name = "pythonteam"
folder_name = "new folder/"

# Initialize S3 client
s3_client = boto3.client(
    "s3",
    aws_access_key_id=s3_access_key,
    aws_secret_access_key=s3_secret_key,
)

def list_and_show_images(bucket, folder):
    try:
        # List all files in the specified S3 bucket folder
        response = s3_client.list_objects_v2(Bucket=bucket, Prefix=folder)
        if "Contents" not in response:
            print("No files found in the specified folder.")
            return

        for item in response["Contents"]:
            file_key = item["Key"]
            # Check if the file is an image by extension
            if file_key.endswith((".png", ".jpg", ".jpeg", ".gif")):
                # Get the image file from S3
                file_obj = s3_client.get_object(Bucket=bucket, Key=file_key)
                # Read the file content into a bytes array
                file_content = file_obj["Body"].read()
                # Convert bytes data to a NumPy array for OpenCV
                np_arr = np.frombuffer(file_content, np.uint8)
                # Decode the image using OpenCV
                img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
                st.image(img)
                # Display the image
                cv2.imshow(f"Image: {file_key}", img)
                cv2.waitKey(0)  # Press any key to close each image window
        cv2.destroyAllWindows()  # Close all windows after displaying images

    except NoCredentialsError:
        print("Credentials not available.")
    except Exception as e:
        print(f"Error fetching images: {e}")

# Call the function to list and show images
list_and_show_images(bucket_name, folder_name)


In [9]:
import boto3
import os
import streamlit as st

# Access AWS credentials from environment variables
s3_access_key = os.getenv("ACCESS_KEY")
s3_secret_key = os.getenv("SECRET_KEY")
bucket_name = "pythonteam"
folder_name = "new folder/"

# Initialize S3 client
s3_client = boto3.client(
    "s3",
    aws_access_key_id=s3_access_key,
    aws_secret_access_key=s3_secret_key,
)

# Function to list and generate pre-signed URLs for images
def list_image_files_with_presigned_urls(bucket, folder):
    image_urls = []
    try:
        response = s3_client.list_objects_v2(Bucket=bucket, Prefix=folder)
        if "Contents" in response:
            for item in response["Contents"]:
                file_key = item["Key"]
                # Generate pre-signed URL for each image file
                url = s3_client.generate_presigned_url(
                    "get_object",
                    Params={"Bucket": bucket, "Key": file_key},
                    ExpiresIn=3600  # URL expires in 1 hour
                )
                image_urls.append(url)
        else:
            st.write("No image files found in the specified folder.")
    except Exception as e:
        st.error(f"Error listing or accessing files: {e}")
    return image_urls

# Get the list of image URLs
image_urls = list_image_files_with_presigned_urls(bucket_name, folder_name)

# # Display the images in Streamlit
# st.write("### Images in S3 Bucket")
# for url in image_urls:
#     st.image(url, use_column_width=True)
print(image_urls)


['https://pythonteam.s3.ap-south-1.amazonaws.com/new%20folder/?AWSAccessKeyId=AKIAYSWTHYGCCPVZ2JJ4&Signature=QV84yF4dVhXgTNj%2BF9utzg3JM5s%3D&Expires=1730129986', 'https://pythonteam.s3.ap-south-1.amazonaws.com/new%20folder/10-28-24.png?AWSAccessKeyId=AKIAYSWTHYGCCPVZ2JJ4&Signature=lXXszPstYtbNmnnKkZPWofv4SWc%3D&Expires=1730129986', 'https://pythonteam.s3.ap-south-1.amazonaws.com/new%20folder/10-29-24.png?AWSAccessKeyId=AKIAYSWTHYGCCPVZ2JJ4&Signature=CsQMF9O%2FJ65a%2B%2FQTUqbCBWPPMLg%3D&Expires=1730129986', 'https://pythonteam.s3.ap-south-1.amazonaws.com/new%20folder/10-30-24.png?AWSAccessKeyId=AKIAYSWTHYGCCPVZ2JJ4&Signature=VpiQAeywHmAZpMtNeEMD%2FYvlQzQ%3D&Expires=1730129986', 'https://pythonteam.s3.ap-south-1.amazonaws.com/new%20folder/10-31-24.png?AWSAccessKeyId=AKIAYSWTHYGCCPVZ2JJ4&Signature=0%2BZn0QCmVxvojE%2Bv3mwtEFRPWYo%3D&Expires=1730129986', 'https://pythonteam.s3.ap-south-1.amazonaws.com/new%20folder/11-01-24.png?AWSAccessKeyId=AKIAYSWTHYGCCPVZ2JJ4&Signature=yZbOXsAUEEJ69S6gr

In [16]:
def list_images_in_s3(bucket, folder):
    """
    Lists image files in the specified S3 bucket and folder.

    Args:
        bucket (str): S3 bucket name.
        folder (str): Folder path within the bucket.

    Returns:
        list: List of image file keys.
    """
    image_keys = []
    try:
        # List all files in the specified S3 bucket folder
        response = s3_client.list_objects_v2(Bucket=bucket, Prefix=folder)
        if "Contents" not in response:
            print("No files found in the specified folder.")
            return image_keys

        for item in response["Contents"]:
            file_key = item["Key"]
            # Check if the file is an image by extension
            if file_key.endswith((".png", ".jpg", ".jpeg", ".gif")):
                image_keys.append(file_key)

    except NoCredentialsError:
        print("Credentials not available.")
    except Exception as e:
        print(f"Error listing images: {e}")

    return image_keys

In [17]:
s3_access_key = os.getenv("ACCESS_KEY")
s3_secret_key = os.getenv("SECRET_KEY")
bucket_name = "pythonteam"
folder_name = "new folder/"
list_images_in_s3(bucket_name, folder_name)

['new folder/10-28-24.png',
 'new folder/10-29-24.png',
 'new folder/10-30-24.png',
 'new folder/10-31-24.png',
 'new folder/11-01-24.png',
 'new folder/11-02-24.png',
 'new folder/11-03-24.png']

In [22]:
x='new folder/10-28-24.png'

In [23]:
x = x.split("/")[1]
x

'10-28-24.png'

In [24]:
from pymongo import MongoClient

def delete_records_without_date():
    # Replace with your MongoDB connection string
    mongo_uri = "mongodb+srv://moreyeahsaimldatascience:WMelEMakMwCiPygO@aimlmoreyeahs.8vjae.mongodb.net/?retryWrites=true&w=majority&appName=aimlmoreyeahs"
    
    # Create a MongoDB client
    client = MongoClient(mongo_uri)
    
    # Specify the database and collection
    db = client["Marketing_data"]  # Replace with your database name
    collection = db["content_collection"]  # Replace with your collection name

    # Delete records where the "Date" field does not exist
    result = collection.delete_many({"Date": {"$exists": False}})
    
    print(f"Deleted {result.deleted_count} records without a 'Date' key.")

    # Close the MongoDB client
    client.close()

# Call the function to delete records
delete_records_without_date()


Deleted 65 records without a 'Date' key.
