In [1]:
import sys
import flickrapi
import pandas as pd
import time
import requests
import os
from datetime import datetime

# Flickr API credentials
api_key = ""
api_secret = ""

# Connect to Flickr
flickr = flickrapi.FlickrAPI(api_key, api_secret, format='parsed-json')

# Keywords to process
search_keywords = ["sunset", "cloud", "river", "sky"]

# Define bounding box
lat_north = 51.54620
lat_south = 51.53278
long_west = -0.13832
long_east = -0.11192
per_page = 250
bbox = f"{long_west},{lat_south},{long_east},{lat_north}"

# Reference date
reference_date = datetime(2025, 5, 3)

# Limit image download only (not data rows)
max_download_images = 100

# Function to get one page of photo metadata
def get_page_photos(bbox, page, per_page, text):
    response = flickr.photos.search(
        bbox=bbox,
        text=text,
        per_page=per_page,
        page=page,
        has_geo=1,
        extras="geo,description,tags,views,media,url_s,date_taken,owner_name"
    )
    return response["photos"]

# Loop through each keyword
for search_text in search_keywords:
    print(f"\nProcessing keyword: {search_text}")

    image_folder = f"flickr_images_{search_text}"
    os.makedirs(image_folder, exist_ok=True)

    # Get total pages
    meta = get_page_photos(bbox, 1, per_page, search_text)
    total_pages = meta['pages']
    total_found = meta['total']
    print(f"Total pages: {total_pages} | Total photos: {total_found}")

    all_rows = []
    downloaded_images = 0

    for page in range(1, total_pages + 1):
        print(f"Fetching page {page}/{total_pages}")
        photos = get_page_photos(bbox, page, per_page, search_text)['photo']

        for photo in photos:
            date_str = photo.get("datetaken")
            try:
                date_obj = datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S")
                days_since_taken = (reference_date - date_obj).days
            except:
                days_since_taken = None

            image_url = photo.get("url_s", "")
            image_filename = ""

            if image_url and downloaded_images < max_download_images:
                image_filename = os.path.join(image_folder, f"{photo.get('id')}.jpg")
                try:
                    img_data = requests.get(image_url).content
                    with open(image_filename, 'wb') as handler:
                        handler.write(img_data)
                    downloaded_images += 1
                except:
                    print(f"Failed to download image: {image_url}")
                    image_filename = ""

            row = {
                "latitude": photo.get("latitude"),
                "longitude": photo.get("longitude"),
                "views": photo.get("views"),
                "days_since_taken": days_since_taken,
                "id": photo.get("id"),
                "server": photo.get("server"),
                "secret": photo.get("secret"),
                "title": photo.get("title"),
                "tags": photo.get("tags"),
                "description": photo.get("description", {}).get("_content", ""),
                "date_taken": date_str,
                "url_s": image_url,
                "image_path": image_filename,
                "owner": photo.get("owner"),
                "owner_name": photo.get("ownername"),
                "media": photo.get("media")
            }
            all_rows.append(row)

        time.sleep(1)  # avoid hitting rate limits

    # Save all data to CSV
    df = pd.DataFrame(all_rows)
    filename = f"flickr_photos_{search_text}.csv"
    df.to_csv(filename, index=False, encoding="utf-8-sig")
    print(f"Finished keyword: {search_text}")
    print(f"Downloaded {downloaded_images} images to folder: {image_folder}")
    print(f"CSV saved: {filename}")



Processing keyword: sunset
Total pages: 1 | Total photos: 138
Fetching page 1/1
Finished keyword: sunset
Downloaded 100 images to folder: flickr_images_sunset
CSV saved: flickr_photos_sunset.csv

Processing keyword: cloud
Total pages: 2 | Total photos: 271
Fetching page 1/2
Fetching page 2/2
Finished keyword: cloud
Downloaded 100 images to folder: flickr_images_cloud
CSV saved: flickr_photos_cloud.csv

Processing keyword: river
Total pages: 2 | Total photos: 363
Fetching page 1/2
Fetching page 2/2
Finished keyword: river
Downloaded 100 images to folder: flickr_images_river
CSV saved: flickr_photos_river.csv

Processing keyword: sky
Total pages: 5 | Total photos: 1181
Fetching page 1/5
Fetching page 2/5
Fetching page 3/5
Fetching page 4/5
Fetching page 5/5
Finished keyword: sky
Downloaded 100 images to folder: flickr_images_sky
CSV saved: flickr_photos_sky.csv
