In [15]:
import numpy as np
import pandas as pd
import glob

from backend.app import num_random_images

In [16]:
path = './resources/unsplash-dataset/'
documents = ['photos', 'keywords', 'collections', 'conversions', 'colors']
datasets = {}

for doc in documents:
  files = glob.glob(path + doc + ".tsv*")

  subsets = []
  for filename in files:
    df = pd.read_csv(filename, sep='\t', header=0)
    subsets.append(df)

  datasets[doc] = pd.concat(subsets, axis=0, ignore_index=True)

In [17]:
datasets['photos'].head()

Unnamed: 0,photo_id,photo_url,photo_image_url,photo_submitted_at,photo_featured,photo_width,photo_height,photo_aspect_ratio,photo_description,photographer_username,...,photo_location_country,photo_location_city,stats_views,stats_downloads,ai_description,ai_primary_landmark_name,ai_primary_landmark_latitude,ai_primary_landmark_longitude,ai_primary_landmark_confidence,blur_hash
0,bygTaBey1Xk,https://unsplash.com/photos/bygTaBey1Xk,https://images.unsplash.com/uploads/1413387620...,2014-10-15 15:40:40.111061,t,4635,3070,1.51,,jaspervandermeij,...,,,1708356,19085,sea and rock cliff with grasses under cloudy sky,Neist Point,57.428387,-6.783028,30.348906,LcE{wnIVRixt~WR+NGjbxukCWBWB
1,gXSFnk2a9V4,https://unsplash.com/photos/gXSFnk2a9V4,https://images.unsplash.com/reserve/jEs6K0y1Sb...,2014-07-10 18:36:06,t,2448,3264,0.75,Coastline view,kimberlyrichards,...,United States,Tillamook,9895033,74702,aerial photography of seashore,,,,,LXE4G#IARjj]GdWFxaWBDOxaofj[
2,grg6-DNJuaU,https://unsplash.com/photos/grg6-DNJuaU,https://images.unsplash.com/uploads/1412192004...,2014-10-01 19:33:56.393181,t,5184,3456,1.5,,marcusdallcol,...,,,8967968,38338,man surfboarding on ocean wave during daytime,,,,,LcHx?5R%Rjof01bHWBof4ooMoeax
3,sO42hhChB1c,https://unsplash.com/photos/sO42hhChB1c,https://images.unsplash.com/reserve/ijl3tATFRp...,2014-08-19 21:15:40,t,4896,3264,1.5,Hazy Ocean Waters,arturpokusin,...,,,2071752,10860,body of water,,,,,LyOzVsj[aefQ_4j[ayj[IUayj[ay
4,tkk8_HakQ98,https://unsplash.com/photos/tkk8_HakQ98,https://images.unsplash.com/reserve/6vaWXsQuSW...,2014-05-05 18:31:06,t,2000,1333,1.5,Silhouettes In Desert,carlov,...,,,2720281,9081,car on desert during sunset,,,,,"LYEV]I%19ZR+-=s,RkWW00WB%2j["


In [29]:
start_index = 5639
end_index = 10000

In [30]:
import os
import requests

# resize all images to max 2000 dimension, pillow thumbnail
from PIL import Image

image_urls = datasets['photos']['photo_image_url'].tolist()
i = start_index
# Watch out, won't be in sync with the index of the image as previously some images failed to download. Will cause url_index to need to be later than i
# Seems to be a problem with url_index 5639 only
url_index = start_index
while i < end_index:
    try:
        response = requests.get(image_urls[url_index], stream=True)
        response.raise_for_status()  # Check for errors

        # Save the image
        with open(f"resources/random_images/image_{i}.jpg", "wb") as f:
            for chunk in response.iter_content(1024):
                f.write(chunk)

    except Exception as e:
        print(f"Failed to download {image_urls[i]}: {e}")
        url_index += 1
        continue
    image_path = f'./resources/random_images/image_{i}.jpg'
    image = Image.open(image_path)
    image = image.convert('RGB')
    image.thumbnail((2000, 2000))
    image.save(f'./resources/random_images/image_{i}.jpg')
    i += 1
    url_index += 1


Failed to download https://images.unsplash.com-grass-sun.jpg: HTTPSConnectionPool(host='images.unsplash.com-grass-sun.jpg', port=443): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x17dfdfc50>: Failed to resolve 'images.unsplash.com-grass-sun.jpg' ([Errno 8] nodename nor servname provided, or not known)"))




In [32]:
from depth_map_generator import depth_map_generator
import cv2
import numpy as np

# Generate depth maps in greyscale jpeg
if not os.path.exists('./resources/random_images_depth_maps_greyscale'):
    os.makedirs('./resources/random_images_depth_maps_greyscale')
for i in range(end_index):
    image_path = f'./resources/random_images/image_{i}.jpg'
    image = cv2.imread(image_path)
    depth_map = depth_map_generator.generate_depth_map(image)
    depth_map_greyscale = (depth_map * 255).astype(np.uint8)
    cv2.imwrite(f'./resources/random_images_depth_maps_greyscale/depth_map_greyscale_{i}.jpg', depth_map_greyscale)



In [11]:
# Generate greyscale depth maps to use jpeg compression

for i in range(num_random_images):
    depth_map_path = f'./resources/random_images_depth_maps/depth_map_{i}.npy'
    depth_map = np.load(depth_map_path)
    depth_map_greyscale = (depth_map * 255).astype(np.uint8)
    cv2.imwrite(f'./resources/random_images_depth_maps_greyscale/depth_map_greyscale_{i}.jpg', depth_map_greyscale)