In [1]:
import flickrapi as fl
import urllib
import time
import os
import io
import ast
def readConfig(filename):
    with open(filename, 'r') as f:
        return [line.strip().split()[-1] for line in f.readlines()]

config = readConfig("flickrConfig.txt")

In [2]:
key = config[0]
secret = config[1]
flickr = fl.FlickrAPI(key, secret, format='parsed-json')

In [3]:
def findurl(info):
    farm = info['farm']
    image_id = info['id']
    image_secret = info['secret']
    server = info['server']
    return "https://farm%s.staticflickr.com/%s/%s_%s.jpg" \
% (farm, server, image_id, image_secret)


In [4]:
def findText(photoid):
    info = flickr.photos.getinfo(photo_id=photoid)
    text = {}
    text['description'] = info['photo']['description']['_content']
    text['comments'] = []
    if int(info['photo']['comments']['_content'])>0:
        comments = flickr.photos.comments.getList(photo_id=photoid)
        for comment in comments['comments']['comment']:
            text['comments'].append(comment['_content'])
    return text

In [5]:
#search photos for specific tags.
#Only search for photos taken in the United States.
def search(tags, tag_mode, num=500, RATE_LIMIT = 3600):
    '''
    Input:
    tags --- a comma-delimited list of tags
    tag_mode --- Either 'any' for an OR combination of tags, or 'all' for an AND combination.
    num --- number of photo urls returned. Max=500.
    Output: 
    a list of photo urls and text information
    '''
    count = 0
    time.clock()
    if time.clock()%3600 == 0:
        count = 0
    if count > RATE_LIMIT:
        raise NameError('Hit rate limit')
    photos = flickr.photos.search(tags=tags, tag_mode=tag_mode, per_page=num, woe_id='23424977')
    photoList = photos['photos']['photo']
    info = []
    for photo in photoList:
        data = (findurl(photo), findText(photo['id']))
        data[1]['title'] = photo['title']
        info.append(data)
    return info

In [24]:
urlPos = search("happy", "any", num=100)
urlNeg = search("sad, negative", "any", num=100)


In [25]:
print "number of positive images: %d" % (len(urlPos))
print "number of negative images: %d" % (len(urlNeg))

number of positive images: 100
number of negative images: 100


In [26]:
def writeData(data, label, folderName):
    '''
    Input:
    data --- a list of image url and corresponding text information
    label --- sentimental label, positive or negative
    folderName --- the folder name to be created
    Output:
    a folder containning labeled data file
    '''
    if not os.path.exists(folderName):
        os.makedirs(folderName)
    f = open(folderName+os.sep+label+".txt", "w")
    for item in data:
        f.write(str(item))
        f.write("\n\n")
    print "Finished!"
    f.close()
    
writeData(urlPos, "Positive", "testData")
writeData(urlNeg, "Negative", "testData")

Finished!


In [20]:
#find all urls in a text file
def findurlist(folder, label):
    string = io.open(folder+os.sep+label+".txt", encoding='utf8').readlines()
    info = [item.strip() for item in string if len(item)>1]
    return [ast.literal_eval(item)[0] for item in info]
        
def download(folder, label):
    path = folder+os.sep+"image"+os.sep+label
    if not os.path.exists(path):
            os.makedirs(path)
    if label == "neg":
        label = "Negative"
    else:
        label = "Positive"
    urls = findurlist(folder, label)
    i=0
    for url in urls:
        urllib.urlretrieve(url, path + os.sep+str(i)+".jpg")
        print "Image", i, "finished."
        i += 1 

In [27]:
download("testData", "pos")


Image 0 finished.
Image 1 finished.
Image 2 finished.
Image 3 finished.
Image 4 finished.
Image 5 finished.
Image 6 finished.
Image 7 finished.
Image 8 finished.
Image 9 finished.
Image 10 finished.
Image 11 finished.
Image 12 finished.
Image 13 finished.
Image 14 finished.
Image 15 finished.
Image 16 finished.
Image 17 finished.
Image 18 finished.
Image 19 finished.
Image 20 finished.
Image 21 finished.
Image 22 finished.
Image 23 finished.
Image 24 finished.
Image 25 finished.
Image 26 finished.
Image 27 finished.
Image 28 finished.
Image 29 finished.
Image 30 finished.
Image 31 finished.
Image 32 finished.
Image 33 finished.
Image 34 finished.
Image 35 finished.
Image 36 finished.
Image 37 finished.
Image 38 finished.
Image 39 finished.
Image 40 finished.
Image 41 finished.
Image 42 finished.
Image 43 finished.
Image 44 finished.
Image 45 finished.
Image 46 finished.
Image 47 finished.
Image 48 finished.
Image 49 finished.
Image 50 finished.
Image 51 finished.
Image 52 finished.
Ima