In [8]:
import os
from google.oauth2 import service_account
from googleapiclient.discovery import build

# Authentication
credentials = service_account.Credentials.from_service_account_file('service_account.json')
drive_service = build('drive', 'v3', credentials=credentials)

def list_subfolders(parent_folder_id):
    subfolders = []
    page_token = None
    
    while True:
        response = drive_service.files().list(q=f"'{parent_folder_id}' in parents and mimeType = 'application/vnd.google-apps.folder'",
                                              fields="nextPageToken, files(id, name)",
                                              pageToken=page_token).execute()
        subfolders.extend(response.get('files', []))
        page_token = response.get('nextPageToken', None)
        if not page_token:
            break
    
    return subfolders

def list_files_in_folder(folder_id):
    """
    Lists all files in a folder.
    """
    files = []
    page_token = None
    while True:
        response = drive_service.files().list(q=f"'{folder_id}' in parents",
                                               fields="nextPageToken, files(id, name)",
                                               pageSize=100,
                                               pageToken=page_token).execute()
        files.extend(response.get('files', []))
        page_token = response.get('nextPageToken', None)
        if not page_token:
            break
    return files

def count_files_in_folder(folder_id):
    files = list_files_in_folder(folder_id)
    return len(files)

def count_files_in_subfolders(parent_folder_id):
    subfolders = list_subfolders(parent_folder_id)
    total_files = 0
    
    for subfolder in subfolders:
        subfolder_id = subfolder['id']
        subfolder_name = subfolder['name']
        num_files = count_files_in_folder(subfolder_id)
        total_files += num_files
        print(f"'{subfolder_name}': {num_files} files")
        
        # Recursively count files in sub-subfolders if any
        total_files += count_files_in_subfolders(subfolder_id)
    
    return total_files

# Define the main folder ID
main_folder_id = '1M2Q0pfPkUCcdchYHvTk7Np-KhJ8rn25a'

# Count files in subfolders recursively
total_files = count_files_in_subfolders(main_folder_id)
print(f"\nTotal number of files in all subfolders: {total_files}")


'train': 2 files
'fake': 1399 files
'real': 1399 files
'test': 2 files
'fake': 200 files
'real': 200 files
'validation': 2 files
'real': 401 files
'fake': 401 files

Total number of files in all subfolders: 4006


In [14]:
import os
from google.oauth2 import service_account
from googleapiclient.discovery import build

# Authentication
credentials = service_account.Credentials.from_service_account_file('service_account.json')
drive_service = build('drive', 'v3', credentials=credentials)

def list_files_in_folder(folder_id):
    """
    Lists all files in a folder.
    """
    files = []
    page_token = None
    while True:
        response = drive_service.files().list(q=f"'{folder_id}' in parents",
                                               fields="nextPageToken, files(id, name)",
                                               pageSize=100,
                                               pageToken=page_token).execute()
        files.extend(response.get('files', []))
        page_token = response.get('nextPageToken', None)
        if not page_token:
            break
    return files

def extract_numbers_from_filenames(files):
    """
    Extracts numbers from filenames of the format xxxxx.png.
    """
    numbers = []
    for file in files:
        filename = file['name']
        if filename.endswith('.png'):
            number = filename.split('.')[0]  # Split filename by '.' and take the part before '.png'
            number = int(number) if number.isdigit() else None  # Convert to integer if possible
            if number is not None:
                numbers.append(number)
    return numbers

def get_real_numbers(parent_folder_id, train_subfolder_name, real_subfolder_name):
    train_subfolder_id = get_subfolder_id(parent_folder_id, train_subfolder_name)
    if train_subfolder_id:
        real_subfolder_id = get_subfolder_id(train_subfolder_id, real_subfolder_name)
        if real_subfolder_id:
            files = list_files_in_folder(real_subfolder_id)
            numbers = extract_numbers_from_filenames(files)
            return numbers
        else:
            print(f"Subfolder '{real_subfolder_name}' not found or unable to access.")
            return []
    else:
        print(f"Subfolder '{train_subfolder_name}' not found or unable to access.")
        return []

def get_subfolder_id(parent_folder_id, subfolder_name):
    try:
        response = drive_service.files().list(q=f"'{parent_folder_id}' in parents and name = '{subfolder_name}' and mimeType = 'application/vnd.google-apps.folder'",
                                              fields="files(id)").execute()
        subfolders = response.get('files', [])
        if subfolders:
            return subfolders[0]['id']
        else:
            return None
    except Exception as e:
        print(f"An error occurred while fetching subfolder ID for '{subfolder_name}': {e}")
        return None

# Define the main folder ID
main_folder_id = '1M2Q0pfPkUCcdchYHvTk7Np-KhJ8rn25a'
train_subfolder_name = 'train'
real_subfolder_name = 'real'

# Extract numbers from filenames in the "real" subfolder within the "train" subfolder
real_numbers = get_real_numbers(main_folder_id, train_subfolder_name, real_subfolder_name)

# Print the first 10 numbers for verification
print("Real Numbers:", real_numbers[:10])


Real Numbers: [62424, 62611, 62014, 61091, 61876, 61967, 62751, 62065, 62093, 62662]


In [15]:
len(real_numbers)

1399

In [16]:
min(real_numbers)

61000

In [17]:
max(real_numbers)

62999

In [18]:
max(real_numbers) - min(real_numbers)

1999

In [20]:
train_subfolder_name = 'test'
real_subfolder_name = 'real'

# Extract numbers from filenames in the "real" subfolder within the "train" subfolder
test_numbers = get_real_numbers(main_folder_id, train_subfolder_name, real_subfolder_name)
print(len(test_numbers))
print(min(test_numbers))
print(max(test_numbers))
print(max(test_numbers) - min(test_numbers))


200
61029
62990
1961


In [21]:
train_subfolder_name = 'validation'
real_subfolder_name = 'real'

# Extract numbers from filenames in the "real" subfolder within the "train" subfolder
validation_numbers = get_real_numbers(main_folder_id, train_subfolder_name, real_subfolder_name)
print(len(validation_numbers))
print(min(validation_numbers))
print(max(validation_numbers))
print(max(validation_numbers) - min(validation_numbers))


401
61004
62997
1993


In [22]:
real_pngs = real_numbers+test_numbers+validation_numbers

print(len(real_pngs))
print(min(real_pngs))
print(max(real_pngs))
print(max(real_pngs) - min(real_pngs))

2000
61000
62999
1999


In [25]:
import os
from google.oauth2 import service_account
from googleapiclient.discovery import build

# Authentication
credentials = service_account.Credentials.from_service_account_file('service_account.json')
drive_service = build('drive', 'v3', credentials=credentials)

def list_files_in_folder(folder_id):
    """
    Lists all files in a folder.
    """
    files = []
    page_token = None
    while True:
        response = drive_service.files().list(q=f"'{folder_id}' in parents",
                                               fields="nextPageToken, files(id, name)",
                                               pageSize=100,
                                               pageToken=page_token).execute()
        files.extend(response.get('files', []))
        page_token = response.get('nextPageToken', None)
        if not page_token:
            break
    return files

def extract_number_from_filename(filename):
    """
    Extracts the number from the filename.
    """
    name, ext = os.path.splitext(filename)
    try:
        return int(name.split('_')[-1])  # Extracts the number part from the filename
    except ValueError:
        return None

# Define the folder ID
folder_id = '19StEBq_VOH-65N18FZu5BjzAquOklzL7'

# List all files in the folder
files = list_files_in_folder(folder_id)

# Extract numbers from filenames
numbers = [extract_number_from_filename(file['name']) for file in files]
numbers = [num for num in numbers if num is not None]  # Remove None values

# Find the minimum and maximum numbers
min_number = min(numbers)
max_number = max(numbers)

# Find the file titles with the minimum and maximum numbers
min_file_title = next((file['name'] for file in files if extract_number_from_filename(file['name']) == min_number), None)
max_file_title = next((file['name'] for file in files if extract_number_from_filename(file['name']) == max_number), None)

# Print the total number of files
print("Total number of files:", len(files))

# Print the titles of the file with the minimum and maximum numbers
print("Minimum file title:", min_file_title)
print("Maximum file title:", max_file_title)


Total number of files: 4008
Minimum file title: photo_1.jpg
Maximum file title: photo_4000.jpg


In [32]:
import os
from google.oauth2 import service_account
from googleapiclient.discovery import build

# Authentication
credentials = service_account.Credentials.from_service_account_file('service_account.json')
drive_service = build('drive', 'v3', credentials=credentials)

def list_files_in_folder(folder_id):
    """
    Lists all files in a folder.
    """
    files = []
    page_token = None
    while True:
        response = drive_service.files().list(q=f"'{folder_id}' in parents",
                                               fields="nextPageToken, files(id, name)",
                                               pageSize=100,
                                               pageToken=page_token).execute()
        files.extend(response.get('files', []))
        page_token = response.get('nextPageToken', None)
        if not page_token:
            break
    return files

def extract_numbers_from_filenames(files):
    """
    Extracts numbers from filenames of the format photo_num.jpg.
    """
    numbers = []
    for file in files:
        filename = file['name']
        if filename.startswith('photo_') and filename.endswith('.jpg'):
            number = filename.split('_')[1]  # Split filename by '_' and take the part after 'photo_'
            number = os.path.splitext(number)[0]  # Remove the file extension '.jpg'
            number = int(number) if number.isdigit() else None  # Convert to integer if possible
            if number is not None:
                numbers.append(number)
    return numbers

def get_fake_numbers(parent_folder_id, train_subfolder_name, real_subfolder_name):
    train_subfolder_id = get_subfolder_id(parent_folder_id, train_subfolder_name)
    if train_subfolder_id:
        real_subfolder_id = get_subfolder_id(train_subfolder_id, real_subfolder_name)
        if real_subfolder_id:
            files = list_files_in_folder(real_subfolder_id)
            numbers = extract_numbers_from_filenames(files)
            return numbers
        else:
            print(f"Subfolder '{real_subfolder_name}' not found or unable to access.")
            return []
    else:
        print(f"Subfolder '{train_subfolder_name}' not found or unable to access.")
        return []

def get_subfolder_id(parent_folder_id, subfolder_name):
    try:
        response = drive_service.files().list(q=f"'{parent_folder_id}' in parents and name = '{subfolder_name}' and mimeType = 'application/vnd.google-apps.folder'",
                                              fields="files(id)").execute()
        subfolders = response.get('files', [])
        if subfolders:
            return subfolders[0]['id']
        else:
            return None
    except Exception as e:
        print(f"An error occurred while fetching subfolder ID for '{subfolder_name}': {e}")
        return None

# Define the main folder ID
main_folder_id = '1M2Q0pfPkUCcdchYHvTk7Np-KhJ8rn25a'
train_subfolder_name = 'train'
real_subfolder_name = 'fake'

# Extract numbers from filenames in the "real" subfolder within the "train" subfolder
fake_numbers = get_fake_numbers(main_folder_id, train_subfolder_name, real_subfolder_name)

# Print the first 10 numbers for verification
print("Fake Numbers:", fake_numbers[:10])
print(len(fake_numbers))
print(min(fake_numbers))
print(max(fake_numbers))

Fake Numbers: [642, 477, 1912, 1494, 738, 1984, 1074, 1947, 1870, 775]
1394
1
2003


In [33]:
# Define the main folder ID
main_folder_id = '1M2Q0pfPkUCcdchYHvTk7Np-KhJ8rn25a'
train_subfolder_name = 'test'
real_subfolder_name = 'fake'

# Extract numbers from filenames in the "real" subfolder within the "train" subfolder
test_fake_numbers = get_fake_numbers(main_folder_id, train_subfolder_name, real_subfolder_name)
print(len(test_fake_numbers))
print(min(test_fake_numbers))
print(max(test_fake_numbers))

200
50
2005


In [34]:
# Define the main folder ID
main_folder_id = '1M2Q0pfPkUCcdchYHvTk7Np-KhJ8rn25a'
train_subfolder_name = 'validation'
real_subfolder_name = 'fake'

# Extract numbers from filenames in the "real" subfolder within the "train" subfolder
validation_fake_numbers = get_fake_numbers(main_folder_id, train_subfolder_name, real_subfolder_name)
print(len(validation_fake_numbers))
print(min(validation_fake_numbers))
print(max(validation_fake_numbers))

401
2
2004


In [35]:
fake_jpgs = fake_numbers+test_fake_numbers+validation_fake_numbers
print(len(fake_jpgs))
print(min(fake_jpgs))
print(max(fake_jpgs))

1995
1
2005
