In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!mkdir -p "/content/project"

In [None]:
import requests
import os
import time
from urllib.request import urlretrieve
import zipfile
import json

# Constants
CLIENT_ID = "7vcCWZBLOnRs0qKdcjyEY_o8VJvHoqDTroP_rq9dUsY"
SEASON = "spring"
IMAGE_COUNT = {"train": 6000, "validation": 1500, "test": 1500}
PER_PAGE = 30
BASE_PATH = "/content/project"
SECONDS_BETWEEN_REQUESTS = 72  # Throttle requests to stay under rate limit
PROGRESS_FILE = "/content/download_progress.json"

In [None]:
# Function to fetch images
def get_unsplash_photos(keyword, client_id, page, per_page=PER_PAGE):
    url = "https://api.unsplash.com/search/photos"
    params = {
        "query": keyword,
        "client_id": client_id,
        "per_page": per_page,
        "page": page
    }
    response = requests.get(url, params=params)
    if response.status_code == 200:
        return [photo['urls']['regular'] for photo in response.json()['results']]
    else:
        print(f"Failed to fetch photos for {keyword}, page {page}: {response.status_code}")
        return []

# Function to download and save images with custom filenames
def download_and_save_images(image_urls, folder_path, image_name, start_index):
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    for i, url in enumerate(image_urls, start=start_index):
        filename = f"{folder_path}/{image_name}_{i}.jpg"
        urlretrieve(url, filename)

# Function to download images for a season and dataset part
def download_season_images(season, part, target_count, progress):
    folder_path = os.path.join(BASE_PATH, season, part)
    current_page = progress.get(season, {}).get(part, 1)
    downloaded_count = (current_page - 1) * PER_PAGE
    start_index = downloaded_count

    while downloaded_count < target_count:
        image_urls = get_unsplash_photos(season, CLIENT_ID, current_page)
        download_and_save_images(image_urls, folder_path, season.capitalize(), start_index)
        print(f"Successfully download photos on page {current_page}")
        downloaded_count += len(image_urls)
        current_page += 1
        start_index += len(image_urls)
        save_progress(season, part, current_page)
        if downloaded_count < target_count:
            time.sleep(SECONDS_BETWEEN_REQUESTS)  # Throttle requests

# Save progress to a file
def save_progress(season, part, page):
    if os.path.exists(PROGRESS_FILE):
        with open(PROGRESS_FILE, 'r') as file:
            progress = json.load(file)
    else:
        progress = {}

    if season not in progress:
        progress[season] = {}
    progress[season][part] = page

    with open(PROGRESS_FILE, 'w') as file:
        json.dump(progress, file)

# Load progress from a file
def load_progress():
    if os.path.exists(PROGRESS_FILE):
        with open(PROGRESS_FILE, 'r') as file:
            return json.load(file)
    return {}

# Zip the folders
def zip_folders():
    with zipfile.ZipFile('/content/project.zip', 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, dirs, files in os.walk(BASE_PATH):
            for file in files:
                zipf.write(os.path.join(root, file), os.path.relpath(os.path.join(root, file), BASE_PATH))

In [None]:
# Main download loop with progress tracking
progress = load_progress()
for part, count in IMAGE_COUNT.items():
    download_season_images(SEASON, part, count, progress)

zip_folders()
print("Zipping complete. Download '/content/project.zip'")

Successfully download photos on page 1
Successfully download photos on page 2
Successfully download photos on page 3
Successfully download photos on page 4
Successfully download photos on page 5
Successfully download photos on page 6
Successfully download photos on page 7
Successfully download photos on page 8
Successfully download photos on page 9
Successfully download photos on page 10
Successfully download photos on page 11
Successfully download photos on page 12
Successfully download photos on page 13
Successfully download photos on page 14
Successfully download photos on page 15
Successfully download photos on page 16
Successfully download photos on page 17
Successfully download photos on page 18
Successfully download photos on page 19
Successfully download photos on page 20
Successfully download photos on page 21
Successfully download photos on page 22
Successfully download photos on page 23
Successfully download photos on page 24
Successfully download photos on page 25
Successfu

In [None]:
!zip -r "/content/spring_train.zip" "/content/project/spring/train"

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  adding: content/project/spring/train/Spring_1288.jpg (deflated 0%)
  adding: content/project/spring/train/Spring_2326.jpg (deflated 0%)
  adding: content/project/spring/train/Spring_320.jpg (deflated 1%)
  adding: content/project/spring/train/Spring_52.jpg (deflated 0%)
  adding: content/project/spring/train/Spring_4935.jpg (deflated 0%)
  adding: content/project/spring/train/Spring_1621.jpg (deflated 1%)
  adding: content/project/spring/train/Spring_5845.jpg (deflated 0%)
  adding: content/project/spring/train/Spring_1363.jpg (deflated 0%)
  adding: content/project/spring/train/Spring_2334.jpg (deflated 0%)
  adding: content/project/spring/train/Spring_3587.jpg (deflated 0%)
  adding: content/project/spring/train/Spring_4217.jpg (deflated 0%)
  adding: content/project/spring/train/Spring_31.jpg (deflated 1%)
  adding: content/project/spring/train/Spring_2260.jpg (deflated 0%)
  adding: content/project/spring/train/Spri

In [None]:
!zip -r "/content/spring_validation.zip" "/content/project/spring/validation"

  adding: content/project/spring/validation/ (stored 0%)
  adding: content/project/spring/validation/Spring_963.jpg (deflated 0%)
  adding: content/project/spring/validation/Spring_1038.jpg (deflated 0%)
  adding: content/project/spring/validation/Spring_155.jpg (deflated 0%)
  adding: content/project/spring/validation/Spring_1078.jpg (deflated 0%)
  adding: content/project/spring/validation/Spring_1293.jpg (deflated 0%)
  adding: content/project/spring/validation/Spring_1245.jpg (deflated 0%)
  adding: content/project/spring/validation/Spring_414.jpg (deflated 0%)
  adding: content/project/spring/validation/Spring_811.jpg (deflated 0%)
  adding: content/project/spring/validation/Spring_538.jpg (deflated 0%)
  adding: content/project/spring/validation/Spring_1061.jpg (deflated 0%)
  adding: content/project/spring/validation/Spring_948.jpg (deflated 1%)
  adding: content/project/spring/validation/Spring_888.jpg (deflated 0%)
  adding: content/project/spring/validation/Spring_1248.jpg (d

In [None]:
!zip -r "/content/spring_test.zip" "/content/project/spring/test"

  adding: content/project/spring/test/ (stored 0%)
  adding: content/project/spring/test/Spring_963.jpg (deflated 0%)
  adding: content/project/spring/test/Spring_1038.jpg (deflated 0%)
  adding: content/project/spring/test/Spring_155.jpg (deflated 0%)
  adding: content/project/spring/test/Spring_1078.jpg (deflated 0%)
  adding: content/project/spring/test/Spring_1293.jpg (deflated 0%)
  adding: content/project/spring/test/Spring_1245.jpg (deflated 0%)
  adding: content/project/spring/test/Spring_414.jpg (deflated 0%)
  adding: content/project/spring/test/Spring_811.jpg (deflated 0%)
  adding: content/project/spring/test/Spring_538.jpg (deflated 0%)
  adding: content/project/spring/test/Spring_1061.jpg (deflated 0%)
  adding: content/project/spring/test/Spring_948.jpg (deflated 1%)
  adding: content/project/spring/test/Spring_888.jpg (deflated 0%)
  adding: content/project/spring/test/Spring_1248.jpg (deflated 1%)
  adding: content/project/spring/test/Spring_986.jpg (deflated 0%)
  add

In [None]:
import requests
import os
from urllib.request import urlretrieve

UNSPLASH_ACCESS_KEY = "7vcCWZBLOnRs0qKdcjyEY_o8VJvHoqDTroP_rq9dUsY"
drive_base_path = '/content/train'

In [None]:
def get_unsplash_photos(keyword, client_id, start_page, pages, per_page=30):
    image_urls = []
    url = "https://api.unsplash.com/search/photos"
    for page in range(start_page, start_page + pages):
        params = {
            "query": keyword,
            "client_id": client_id,
            "page": page,
            "per_page": per_page
        }
        response = requests.get(url, params=params)
        if response.status_code == 200:
            photos = response.json()['results']
            for photo in photos:
                image_urls.append(photo['urls']['regular'])
            print(f"Successfully fetch photos on page {page}")
        else:
            print("Failed to fetch photos:", response.status_code)
            return []
    return image_urls

In [None]:
def download_and_save_images(image_urls, folder_name):
    folder_path = os.path.join(drive_base_path, folder_name)
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    for i, url in enumerate(image_urls):
        filename = f"{folder_path}/{folder_name}_{i}.jpg"
        urlretrieve(url, filename)
        print(f"Downloaded {filename}")

In [None]:
# Manually fetch photos due to request limit in Unsplash
image_urls = get_unsplash_photos("spring", client_id, 1, 50)
download_and_save_images(image_urls, season.capitalize())

In [None]:
image_urls = get_unsplash_photos("spring", client_id, 51, 50)
download_and_save_images(image_urls, season.capitalize())

In [None]:
image_urls = get_unsplash_photos("spring", client_id, 101, 50)
download_and_save_images(image_urls, season.capitalize())

In [None]:
image_urls = get_unsplash_photos("spring", client_id, 151, 50)
download_and_save_images(image_urls, season.capitalize())

In [None]:
!zip -r "/content/train/train_spring.zip" "/content/train"