# Instagram Hashtag Poster

This notebook downloads all images from given hashtag and puts them into one large poster.

**Important info**: The Instagram API has a sanbox mode, which prevents any app created after Nov 2015 download images from all users prior to app approval. The workaround is to use an old Instagram app id - unfortunately after June 2016 this won't work too. See https://www.instagram.com/developer/sandbox/ for more info.

In [None]:
import imagehash
import json
import math
import requests
import shutil
import numpy as np
from io import BytesIO
from IPython.display import Javascript
from PIL import Image
from sklearn.cluster import KMeans

CLIENT_ID = '7b75033299b04b478a75b59044238b78'
REDIRECT_URI = 'http://localhost:8888/notebooks/notebooks/Instagram%20Hashtag%20Poster.ipynb'  # must be specified in ig client settings
IG_ENDPOINT = 'https://api.instagram.com/v1/'
IG_WIDTH = 150

THE_TAG = 'acupofczech' #'acupoftravel'

The two following cells handles OAuth2 authentication:

In [None]:
%%javascript
IPython.notebook.kernel.execute("NOTEBOOK_URL = '" + window.location.href + "'");

In [None]:
fragment = '#access_token='

if fragment in NOTEBOOK_URL:
    ACCESS_TOKEN = NOTEBOOK_URL[NOTEBOOK_URL.index(fragment)+len(fragment):]
    print('Got the token! Continue below.')
else:
    Javascript('location.href = "http://google.com"')
    print('Click the link and run the cells again.')
    print('https://api.instagram.com/oauth/authorize/?client_id={}&redirect_uri={}&response_type=token'.format(CLIENT_ID, requests.compat.quote_plus(REDIRECT_URI)))

In [None]:
# Init

r = requests.get(IG_ENDPOINT + 'tags/{}/'.format(THE_TAG), {'access_token': ACCESS_TOKEN})
n_images = r.json()['data']['media_count']
poster_width = math.floor(math.sqrt(n_images))

In [None]:
# Get the paginated response and merge into one big json
data = []

i = 0
r = requests.get(IG_ENDPOINT + 'tags/{}/media/recent'.format(THE_TAG), {'access_token': ACCESS_TOKEN})
while 'next_url' in r.json()['pagination']:
    data.extend(r.json()['data'])
    i += 1
    print(i)
    r = requests.get(r.json()['pagination']['next_url'])

with open('{}.json'.format(THE_TAG), 'w') as outfile:
    json.dump(data, outfile)

## Simple poster generation

In [None]:
# Download images and place them on the poster
# Beware, the poster may eat a lot of memory,
# adujst the width (will use first poster_width * poster_width images)
# and resolution (IG_WIDTH)

poster_width = 40
i = 0

poster_img = Image.new('RGB', (IG_WIDTH*poster_width, IG_WIDTH*poster_width))

for img_json in data:
    img_url = img_json['images']['thumbnail']['url']
    img = Image.open(BytesIO(requests.get(img_url).content))
    
    poster_img.paste(img, (IG_WIDTH*(i%poster_width), IG_WIDTH*(i//poster_width)))
    i += 1

    if i > poster_width**2:
        break

    if i % poster_width == 0:
        print('Row {}/{}'.format(i//poster_width, poster_width))

In [None]:
poster_img.save('ig_poster_{}.jpg'.format(THE_TAG), 'JPEG', quality=90, optimize=True, progressive=True)

## Clustering the images

In [None]:
# Download the images
for img_json in data:
    img_url = img_json['images']['thumbnail']['url']
    img = Image.open(BytesIO(requests.get(img_url).content))
    r = requests.get(img_url, stream=True)
    with open('thumbs/{}.jpg'.format(img_json['id']), 'wb') as out_file:
        shutil.copyfileobj(r.raw, out_file)

In [None]:
# Calculate hashes
hash_size = 64
hashes = np.zeros((len(data), int(hash_size * hash_size/8)))

for i, img_json in enumerate(data):
    img = Image.open('thumbs/{}.jpg'.format(img_json['id']))
    h = imagehash.average_hash(img, hash_size=hash_size)
    hashes[i, :] = np.packbits(h.hash)

In [None]:
# Cluster
n_clusters = 20
y_predict = KMeans(n_clusters=n_clusters).fit_predict(hashes)

In [None]:
# Create images
for y in range(n_clusters):
    cluster = np.where(y_predict == y)[0]
    cluster_width = int(len(cluster)**(1/2)) + 1
    cluster_img = Image.new('RGB', (IG_WIDTH*cluster_width, IG_WIDTH*cluster_width))

    for i, j in enumerate(cluster):
        img = Image.open('thumbs/{}.jpg'.format(data[j]['id']))
        cluster_img.paste(img, (IG_WIDTH*(i%cluster_width), IG_WIDTH*(i//cluster_width)))

    cluster_img.save('ig_cluster_{}_{}.jpg'.format(THE_TAG, y), 'JPEG', quality=90, optimize=True, progressive=True)