In [None]:
#!pip install opencv-python easydict pyyaml

In [None]:
# prompt: mount google drive?

from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
import requests
import os
import cv2
import numpy as np

# Downloading Images from Flickr

In [None]:
def save_images_from_flickr(api_key, query, num_images, dataset_directory):
    if not os.path.exists(dataset_directory):
        os.makedirs(dataset_directory)
        print(f"Created directory: {dataset_directory}")

    url = "https://api.flickr.com/services/rest/"
    params = {
        "method": "flickr.photos.search",
        "api_key": api_key,
        "text": query,
        "format": "json",
        "nojsoncallback": 1,
        "per_page": num_images
    }

    response = requests.get(url, params=params)
    response.raise_for_status()

    photos = response.json()["photos"]["photo"]
    print("Photos found:", len(photos))

    for i, photo in enumerate(photos):
        photo_url = f"https://live.staticflickr.com/{photo['server']}/{photo['id']}_{photo['secret']}.jpg"
        print("Downloading:", photo_url)

        img_response = requests.get(photo_url)
        img_response.raise_for_status()

        with open(os.path.join(dataset_directory, f"image_{i}.jpg"), "wb") as file:
            file.write(img_response.content)
            print(f"Saved image_{i}.jpg")


queries = ["innovation", "conservatism",
           "premuim", "accessibility",
           "minimalism", "detail",
           "safety", "risk",

           "healthy", "unhealthy",
           "glamorous", "drab",
           "rugged", "gentle",
           "fun", "dull",
]

api_key = "6c5f4e8afdb73883214c9b9471629b81"

num_images = 1000

for query in queries:
    dataset_directory = "/content/" + query
    save_images_from_flickr(api_key, query, num_images, dataset_directory)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Downloading: https://live.staticflickr.com/65535/53703109033_fbbf68c509.jpg
Saved image_4.jpg
Downloading: https://live.staticflickr.com/65535/53701458827_90509f9ded.jpg
Saved image_5.jpg
Downloading: https://live.staticflickr.com/65535/53702522719_5236ae6f25.jpg
Saved image_6.jpg
Downloading: https://live.staticflickr.com/65535/53701734811_2286d73d74.jpg
Saved image_7.jpg
Downloading: https://live.staticflickr.com/65535/53700210155_b5b8c7bedb.jpg
Saved image_8.jpg
Downloading: https://live.staticflickr.com/65535/53697500852_9594b1ddef.jpg
Saved image_9.jpg
Downloading: https://live.staticflickr.com/65535/53697486752_021da18774.jpg
Saved image_10.jpg
Downloading: https://live.staticflickr.com/65535/53697186472_d79809bd26.jpg
Saved image_11.jpg
Downloading: https://live.staticflickr.com/65535/53697871588_9385fa085e.jpg
Saved image_12.jpg
Downloading: https://live.staticflickr.com/65535/53696669517_573ccc75b6.jpg
Saved imag

# Preprocess

In [None]:
def preprocess_image(img_path, target_size=(224, 224)):

    img = cv2.imread(img_path)
    img = cv2.resize(img, target_size)
    img = img.astype('float32') / 255.0
    return img

def preprocess_images_from_folders(folders, target_size=(224, 224)):
    preprocessed_images = {}

    for folder in folders:
        label = os.path.basename(folder)
        preprocessed_images[label] = []

        for img_file in os.listdir(folder):
            img_path = os.path.join(folder, img_file)
            if os.path.isfile(img_path):
                preprocessed_images[label].append(preprocess_image(img_path, target_size))

    return preprocessed_images

folders = ['/content/healthy', '/content/unhealthy', '/content/glamorous', '/content/drab', '/content/rugged', '/content/gentle', '/content/fun', '/content/dull',
           '/content/innovation', '/content/conservatism', '/content/premuim', '/content/accessibility', '/content/minimalism', '/content/detail', '/content/safety', '/content/risk']

preprocessed_images = preprocess_images_from_folders(folders)

for label, images in preprocessed_images.items():
    print(f"Label: {label}, Number of Images: {len(images)}")

Label: healthy, Number of Images: 500
Label: unhealthy, Number of Images: 499
Label: glamorous, Number of Images: 498
Label: drab, Number of Images: 500
Label: rugged, Number of Images: 500
Label: gentle, Number of Images: 500
Label: fun, Number of Images: 500
Label: dull, Number of Images: 500
Label: innovation, Number of Images: 500
Label: conservatism, Number of Images: 497
Label: premuim, Number of Images: 499
Label: accessibility, Number of Images: 500
Label: minimalism, Number of Images: 497
Label: detail, Number of Images: 500
Label: safety, Number of Images: 500
Label: risk, Number of Images: 500


# Save to google drive

In [None]:
def save_preprocessed_images(preprocessed_images, target_directory):
    if not os.path.exists(target_directory):
        os.makedirs(target_directory)

    for label, images in preprocessed_images.items():
        label_dir = os.path.join(target_directory, label)
        if not os.path.exists(label_dir):
            os.makedirs(label_dir)

        for i, img in enumerate(images):
            img_path = os.path.join(label_dir, f"{label}_{i}.jpg")
            cv2.imwrite(img_path, img * 255)  # Rescale back to 0-255 range

# Shared Google Drive folder path
shared_folder_path = '/content/drive/My Drive/ConsumerCompass/data'

# Save images to the shared folder
save_preprocessed_images(preprocessed_images, shared_folder_path)

---