In [13]:
import os
import requests
from datetime import datetime
import json
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Load users data from users.json
with open('/content/drive/MyDrive/test/users.json', 'r') as f:
    users_data = json.load(f)

# Define function to upload image to Drive
def upload_image_to_drive(image_url, image_name, folder_path):
    # Download the image from the URL
    response = requests.get(image_url)
    # Save the image to the specified folder
    with open(os.path.join(folder_path, image_name), 'wb') as img_file:
        img_file.write(response.content)
    print(f"'{folder_path} : 'Image '{image_name}' downloaded and saved in Google Drive.")

# Specify the parent folder name where you want to organize the images
parent_folder_name = 'PFA-Dataset'

# Specify the folder name under the parent folder where you want to store the images
images_folder_name = 'images'

# Get the parent folder ID (replace 'YOUR_PARENT_FOLDER_ID' with the actual parent folder ID)
parent_folder_id = '1j79Cy8JuvUZ4ab6N0USHVY45qM1IsRhz'  # Update with the correct parent folder ID

# Create the 'images' folder under the parent folder if it doesn't exist
images_folder = os.path.join('/content/drive/My Drive', parent_folder_name, images_folder_name)
os.makedirs(images_folder, exist_ok=True)

# Control how many users' images to upload at a time
users_to_upload = 200  # Change this value as needed

# Upload images for each user into the 'images' folder
users_processed = 0
for user_data in users_data:

    username = user_data.get('user')
    if not username:
        print("Warning: User has no username.")
        continue

    user_folder = os.path.join(images_folder, username)
    os.makedirs(user_folder, exist_ok=True)  # Create user folder if it doesn't exist

    for post in user_data['posts']:
        media_url = post.get('media_url')
        thumbnail_url= post.get('thumbnail_url')
        image_url = None
        if media_url :
            image_url = media_url
        elif thumbnail_url:
            image_url = thumbnail_url

        # Generate photo_id using timestamp_seconds
        timestamp = datetime.strptime(post['timestamp'], "%Y-%m-%dT%H:%M:%S+0000")
        timestamp_seconds = int(timestamp.timestamp())
        photo_id = f"{timestamp_seconds}"  # Custom photo_id format

        # Update the 'media_url' to 'photo_id' with its corresponding value
        post['photo_id'] = photo_id
        if media_url:
            del post['media_url']  # Remove the 'media_url' key
        if thumbnail_url:
            del post['thumbnail_url']  # Remove the 'thumbnail_url' key
        if image_url:
            timestamp = datetime.strptime(post['timestamp'], "%Y-%m-%dT%H:%M:%S+0000")
            timestamp_seconds = int(timestamp.timestamp())
            # Upload the image to the user's folder
            upload_image_to_drive(image_url, f"{timestamp_seconds}.jpg", user_folder)
        else:
            print("Warning: 'media_url' not found for a post.")

    # Write the modified users_data back to users.json after processing all posts for each user
    with open('/content/drive/MyDrive/test/users.json', 'w') as f:
        json.dump(users_data, f)

print("photo_id written to users.json and media_url removed for each photo downloaded.")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
'/content/drive/My Drive/PFA-Dataset/images/emilyhenrywrites : 'Image '1714057008.jpg' downloaded and saved in Google Drive.
'/content/drive/My Drive/PFA-Dataset/images/emilyhenrywrites : 'Image '1713552075.jpg' downloaded and saved in Google Drive.
'/content/drive/My Drive/PFA-Dataset/images/emilyhenrywrites : 'Image '1712693127.jpg' downloaded and saved in Google Drive.
'/content/drive/My Drive/PFA-Dataset/images/emilyhenrywrites : 'Image '1712013006.jpg' downloaded and saved in Google Drive.
'/content/drive/My Drive/PFA-Dataset/images/emilyhenrywrites : 'Image '1711215555.jpg' downloaded and saved in Google Drive.
'/content/drive/My Drive/PFA-Dataset/images/emilyhenrywrites : 'Image '1698948069.jpg' downloaded and saved in Google Drive.
'/content/drive/My Drive/PFA-Dataset/images/emilyhenrywrites : 'Image '1698360854.jpg' downloaded and saved in Google Dri

KeyboardInterrupt: 

In [None]:
import os
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Path to the directory containing user folders
pfa2_directory = '/content/drive/My Drive/pfa2/images'

# Dictionary to store the count of images per username
folder_image_counts = {}

# Function to count images recursively
def count_images(folder_path):
    total_images = 0
    # Iterate over the contents of the folder
    for item in os.listdir(folder_path):
        item_path = os.path.join(folder_path, item)
        # Check if the item is a directory
        if os.path.isdir(item_path):
            # Recursively count images in subfolders
            total_images += count_images(item_path)
        # Check if the item is a file (image)
        elif os.path.isfile(item_path):
            total_images += 1
    return total_images

# Count of folders under pfa2/images
folder_count = 0

# Iterate over the folders in the pfa2 directory
for username_folder in os.listdir(pfa2_directory):
    # Increment folder count
    folder_count += 1

    # Construct the full path to the username folder
    username_folder_path = os.path.join(pfa2_directory, username_folder)

    # Check if the item in the directory is a folder
    if os.path.isdir(username_folder_path):
        # Count the number of images in the folder and its subfolders
        num_images = count_images(username_folder_path)

        # Store the count in the folder_image_counts dictionary
        folder_image_counts[username_folder] = num_images

# Print the number of folders under pfa2/images
print(f"Number of folders under 'pfa2/images': {folder_count}")

# Print the image counts per username folder less than 100
print("Folders with image count less than 100:")
for username, count in folder_image_counts.items():
    if count < 100:
        print(f"{username}: {count} images")
