# Manage and prepare photos for Social Values Task

In [55]:
import PIL
from PIL import Image
import pandas as pd
import numpy as np
from os import listdir
from os.path import isfile, join
import re

Read in list of all photos

In [56]:
photo_dir = '../img/'
photo_files = [f for f in listdir(photo_dir) if isfile(join(photo_dir, f)) ]

# use only jpgs on pngs
photo_files = [f for f in photo_files if f.startswith('Photo') and (f.endswith('.jpg') or f.endswith('.png')) ]

# screen out left-right flips
photo_files = [f for f in photo_files if not '_2.' in f]

Generate left-right flips for images without them

In [57]:
def create_flip(f_name, f_dir='../img/'):
    """
    Read in the image at `file_path` and create a left-right flipped version.
    Save this new version with '_2' appended to the file name
    """
    f_base, f_ext = (f_name[:-4], f_name[-3:])
    
    im = Image.open(join(f_dir, f_name))
    
    out = im.transpose(Image.FLIP_LEFT_RIGHT)
    out.save(join(f_dir, '{}_2.{}'.format(f_base, f_ext)))
    

In [70]:
flipped = 0
for p in photo_files:
    p_base, p_ext = (p[:-4], p[-3:])
    if not isfile(join(photo_dir, '{}_2.{}'.format(p_base, p_ext))):
        create_flip(p)
        flipped += 1
print('{} new flipped images created.'.format(flipped))

0 new flipped images created.


In [71]:
photo_map = pd.read_excel("PhotoMap.xlsx")

In [72]:
for_maps = []
for photo in photo_map.PhotosFor.values:
    for_vals = photo.replace(' ','').split(',')
    for v in for_vals:
        for_maps.append(v)

In [73]:
for_maps = list(set(for_maps))

In [74]:
con_maps = []
for photo in photo_map.PhotosAgainst.values:
    con_vals = photo.replace(' ','').split(',')
    for v in con_vals:
        con_maps.append(v)

In [75]:
con_maps = list(set(con_maps))

In [76]:
all_maps = for_maps + con_maps 

In [77]:
all_mapped = list(set(all_maps))
all_mapped.sort()

In [78]:
x = re.findall("[0-9][0-9][0-9]*",photo_files[0])
print(x)

['202']


In [79]:
x = re.findall("[0-9][0-9][0-9]*",photo_files[-1])
print(x)

['1079']


In [80]:
for p in photo_files:
    p_num = re.findall("[0-9].*[0-9]", p)
    if not p_num[0] in all_mapped:
        print('{} {}'.format(p, p_num))

Photo_1002_01.png ['1002_01']
Photo_1002_02.png ['1002_02']
Photo_1003_01.png ['1003_01']
Photo_1003_02.png ['1003_02']
Photo_1005_01.png ['1005_01']
Photo_1005_02.png ['1005_02']
Photo_1006_01.png ['1006_01']
Photo_1006_02.png ['1006_02']
Photo_1007_01.png ['1007_01']
Photo_1007_02.png ['1007_02']
Photo_1008_01.png ['1008_01']
Photo_1008_02.png ['1008_02']
Photo_1011_01.png ['1011_01']
Photo_1011_02.png ['1011_02']
Photo_1012_01.png ['1012_01']
Photo_1012_02.png ['1012_02']
Photo_1017_01.png ['1017_01']
Photo_1017_02.png ['1017_02']
Photo_1018_01.png ['1018_01']
Photo_1018_02.png ['1018_02']
Photo_1019_01.png ['1019_01']
Photo_1019_02.png ['1019_02']
Photo_1020_01.png ['1020_01']
Photo_1020_02.png ['1020_02']
Photo_1020_03.png ['1020_03']
Photo_1021_01.png ['1021_01']
Photo_1021_02.png ['1021_02']
Photo_1022_01.png ['1022_01']
Photo_1022_02.png ['1022_02']
Photo_1024_01.png ['1024_01']
Photo_1024_02.png ['1024_02']
Photo_1025_01.png ['1025_01']
Photo_1025_02.png ['1025_02']
Photo_1026

In [83]:
# columns: photo_files
# rows: issues
issues = photo_map.Short.values

In [117]:
'231' in photo_map.PhotosFor.iloc[0,]

False

In [118]:
p_map_for = np.zeros((len(photo_files), len(issues)))
p_map_against = np.zeros((len(photo_files), len(issues)))

# check each issue
for p_idx in range(len(photo_files)):
    # was photo assigned to the issue?
    for i_idx in range(len(issues)):
        # check for 3/4 digit matches
        p_num = re.search('\d+|$', photo_files[p_idx]).group()
        
        # skip blanks
        if len(p_num) == 0:
            continue
        
        if p_num in photo_map.PhotosFor.iloc[i_idx,]:
            hit = 1
        elif len(p_num_multi) > 0 and p_num_multi in photo_map.PhotosFor.iloc[i_idx,]:
            hit = 1
        else:
            hit = 0
            
        p_map_for[p_idx, i_idx] = hit
        
        if p_num in photo_map.PhotosAgainst.iloc[i_idx,]:
            hit = 1
        elif len(p_num_multi) > 0 and p_num_multi in photo_map.PhotosAgainst.iloc[i_idx,]:
            hit = 1
        else:
            hit = 0
        p_map_against[p_idx, i_idx] = hit


In [119]:
photo_labels = [f[:-4] for f in photo_files]

In [120]:
for_df = pd.DataFrame(p_map_for, columns = issues, index = photo_labels)
against_df = pd.DataFrame(p_map_against, columns = issues, index = photo_labels)

In [121]:
for_df.to_csv('photo_map_for.csv')
against_df.to_csv('photo_map_against.csv')

In [127]:
p_map_for.sum(axis=1)

array([35., 22., 32.,  7., 21.,  4.,  6., 15.,  5.,  7., 18.,  2.,  5.,
       27., 16.,  9., 22., 11.,  6.,  8., 21., 18., 37., 34., 29., 31.,
       14., 17., 28., 30., 35., 27., 30., 14.,  9.,  9., 13., 11., 13.,
        4., 14., 18.,  8., 12., 26., 11., 16.,  9.,  3.,  8., 11., 44.,
       44., 17., 17., 12., 11., 11., 10., 10., 10., 10., 30., 30.,  6.,
        5., 14., 14.,  4.,  4.,  4., 10., 17.,  1.,  3.,  3.,  6.,  6.,
        3.,  3.,  2.,  2.,  2.,  4.,  4.,  1.,  1.,  5.,  1.,  1.,  1.,
        1.,  2.,  2.,  2.,  2.,  3.,  5.,  5.,  5.,  4.,  6.,  2.,  2.,
        2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  1.,  3.,  3.,  2.,
       24., 12., 18.,  8., 17., 17., 17.,  6.,  6.,  8.,  8.,  8.,  7.,
        7., 18.,  4.,  8., 14.,  6.,  4., 10.,  4.,  2.,  5.,  5.,  5.,
       11., 11., 11., 11.,  3.,  3.,  3.,  8.,  8.,  8., 10.,  5.,  5.,
        5.,  7.,  7., 13., 13., 13., 13.,  7.,  7.,  3.,  3.,  7.,  7.,
        7.,  7., 10., 10., 10.,  5.,  5.,  5.,  2.,  2.,  2.,  2

In [132]:
nrow, ncol = p_map_against.shape
photo_totals = p_map_against.sum(axis=1)
for ii in range(nrow):
    print(photo_labels[ii], photo_totals[ii])

Photo202 15.0
Photo206 14.0
Photo216 14.0
Photo217 2.0
Photo218 10.0
Photo220 13.0
Photo225 5.0
Photo228 10.0
Photo230 7.0
Photo231 11.0
Photo234 16.0
Photo238 0.0
Photo252 6.0
Photo254 19.0
Photo257 11.0
Photo258 8.0
Photo261 13.0
Photo262 10.0
Photo268 7.0
Photo272 4.0
Photo273 14.0
Photo275 9.0
Photo276 36.0
Photo280 30.0
Photo288 25.0
Photo289 21.0
Photo297 18.0
Photo300 12.0
Photo301 17.0
Photo308 17.0
Photo315 19.0
Photo316 12.0
Photo320 13.0
Photo333 6.0
Photo335 6.0
Photo337 15.0
Photo341 12.0
Photo351 9.0
Photo353 5.0
Photo355 4.0
Photo362 5.0
Photo365 7.0
Photo366 2.0
Photo367 6.0
Photo368 12.0
Photo369 5.0
Photo374 6.0
Photo377 5.0
Photo378 1.0
Photo999 3.0
Photo_1001 15.0
Photo_1002_01 18.0
Photo_1002_02 18.0
Photo_1003_01 5.0
Photo_1003_02 5.0
Photo_1004 6.0
Photo_1005_01 4.0
Photo_1005_02 4.0
Photo_1006_01 2.0
Photo_1006_02 2.0
Photo_1007_01 4.0
Photo_1007_02 4.0
Photo_1008_01 1.0
Photo_1008_02 1.0
Photo_1009 0.0
Photo_1010 0.0
Photo_1011_01 2.0
Photo_1011_02 2.0
Photo_10

190