# Import Image from Flickr via flickrapi

install packages from provided requirements.txt file.

In [1]:
!pip install -r requirements.txt

Collecting git+https://github.com/openai/CLIP.git (from -r requirements.txt (line 5))
  Cloning https://github.com/openai/CLIP.git to /private/var/folders/kp/3bh91wns7nz7wx71lyc4rwwr0000gn/T/pip-req-build-m4n181oe
  Running command git clone -q https://github.com/openai/CLIP.git /private/var/folders/kp/3bh91wns7nz7wx71lyc4rwwr0000gn/T/pip-req-build-m4n181oe
You should consider upgrading via the '/Users/yannick/Documents/VGI/YRG/integration-quality/Flickr_Tag_Analysis/venv/bin/python -m pip install --upgrade pip' command.[0m


In [2]:
import flickrapi
from PIL import Image
import urllib
from gallery import gallery
import requests
import sys
import os
from sklearn.metrics import plot_confusion_matrix
%matplotlib inline
import matplotlib.pyplot as plt

The API credentials can be received by registering to the free, non-commercial Flickr API

In [3]:
import api_creds

ModuleNotFoundError: No module named 'api_creds'

In [None]:
flickr=flickrapi.FlickrAPI(api_creds.api_key, api_creds.api_secret, cache=True)

In [None]:
keyword = "Prunella Modularis"

## Translations and Related Terms via ConceptNet

Get translations and related terms via the ConceptNet API

In [None]:
concept_node = requests.get('http://api.conceptnet.io/c/en/'+keyword.lower().replace(" ","_")).json()

In [None]:
[c["rel"] for c in concept_node["edges"]]

In [None]:
language_synomyms = [c for c in concept_node["edges"] if c["rel"]["label"] == "Synonym"]

In [None]:
translations = [*[(l["start"]["language"], l["start"]["label"]) for l in language_synomyms if l["start"]["language"] != "en"] 
,*[(l["end"]["language"], l["end"]["label"]) for l in language_synomyms if l["end"]["language"] != "en"]]

In [None]:
translations

In [None]:
related_to = [c for c in concept_node["edges"] if c["rel"]["label"] == "RelatedTo"]
related_to = [(c["end"]["language"], c["end"]["label"]) for c in related_to if c["end"]["label"] != keyword.lower()]

In [None]:
'http://related/c/en/'+keyword.lower().replace(" ","_")+"?filter=/c/en"

In [None]:
form_of = [c for c in concept_node["edges"] if c["rel"]["label"] == "FormOf"]
form_of = [(c["start"]["language"], c["start"]["label"]) for c in form_of if c["start"]["label"] != keyword.lower()]

In [None]:
form_of

In [None]:
related_nodes = requests.get('http://api.conceptnet.io/related/c/en/'+keyword.lower().replace(" ","_")+"?filter=/c/en").json()

## Actual Crawling

Based on the given keyword and related tags, we crawl images from Flickr.

In [None]:
photos = flickr.walk(text=keyword,
                     tag_mode='all',
                     tags=keyword,
                     extras='url_c',
                     per_page=100,           # may be you can try different numbers..
                     sort='relevance')

In [None]:
urls = []

os.makedirs(os.path.join("training_data", keyword), exist_ok=True)
                     

for i, photo in enumerate(photos):
    
    url = photo.get('url_c')
    
    # get 200 urls
    if i > 200:
        break
        
        
    if isinstance(url, str):
        try:
            urllib.request.urlretrieve(url, os.path.join("training_data", keyword, "photo_"+str(i)+".png"))
        except:
            print("that didnt work: ", url)
            

In [None]:
geocoord = flickr.photos.geo.getLocation(photo_id=p.get('id'))

geocoord.attrib

In [None]:
gallery(urls[:150], row_height="150px")

# Image Classification for Filtering

In [None]:
import os
import clip
import torch

import numpy as np
from sklearn.linear_model import LogisticRegression
from torch.utils.data import DataLoader
from torchvision.datasets.folder import ImageFolder
import torchvision.transforms as transforms
from tqdm import tqdm

model, preprocess = clip.load("RN50x4")

In [None]:
def clip_label_transform(label: str):
    return "A Picture of a "+label

In [None]:
data_loader = ImageFolder("./training_data", transform=preprocess)

In [None]:
len(data_loader)

In [None]:
device = "cpu"

In [None]:
def get_features(dataset):
    all_features = []
    all_labels = []
    
    with torch.no_grad():
        for images, labels in tqdm(DataLoader(dataset, batch_size=100)):
            features = model.encode_image(images.to(device))

            all_features.append(features)
            all_labels.append(labels)

    return torch.cat(all_features).cpu().numpy(), torch.cat(all_labels).cpu().numpy()

In [None]:
# Calculate the image features
train_set, val_set = torch.utils.data.random_split(data_loader, [1000, 143])
train_features, train_labels = get_features(train_set)
test_features, test_labels = get_features(val_set)

In [None]:
# Perform logistic regression
classifier = LogisticRegression(random_state=0, C=1.0, max_iter=3000, verbose=1)
classifier.fit(train_features, train_labels)

# Evaluate using the logistic regression classifier
predictions = classifier.predict(test_features)
accuracy = np.mean((test_labels == predictions).astype(np.float)) * 100.
print(f"Accuracy = {accuracy:.3f}")

In [None]:
predictions

In [None]:
list(data_loader.class_to_idx.keys())[1:-1]

In [None]:
fig, ax = plt.subplots(figsize=(10, 10))
plot_confusion_matrix(classifier, test_features, test_labels, labels=list(range(9)), display_labels=list(data_loader.class_to_idx.keys()), ax=ax)

In [None]:
image_input = preprocess(image).unsqueeze(0).to(device)
text_inputs = torch.cat([clip.tokenize(f"a photo of a {c}") for c in cifar100.classes]).to(device)

# Calculate features
with torch.no_grad():
    image_features = model.encode_image(image_input)
    text_features = model.encode_text(text_inputs)

# Pick the top 5 most similar labels for the image
image_features /= image_features.norm(dim=-1, keepdim=True)
text_features /= text_features.norm(dim=-1, keepdim=True)
similarity = (100.0 * image_features @ text_features.T).softmax(dim=-1)
values, indices = similarity[0].topk(5)

# Print the result
print("\nTop predictions:\n")
for value, index in zip(values, indices):
    print(f"{cifar100.classes[index]:>16s}: {100 * value.item():.2f}%")