# Batch Downloading Images and Extracting Foreground on Transparent Backgrounds

In [None]:
import numpy as np
import pandas as pd
import cv2 as cv
from matplotlib import pyplot as plt

### Step 1: Load csv with image names and image urls in 2 seperate columns

In [None]:
df = pd.read_csv('/Volumes/Samsung_T3/project-repos/majorstudio/data/clothes/costume_reclass.csv')
print(df.head())

# Read the 1) image names, 2) image urls, 3) any other info you need:
label = df['img']
url = df['src']

# If pulling from API for below:
number = df['number']
objectID = df['objectID']

### API Request: Get Needed Info from API and download to CSV (Optional)

In [None]:
myJson=[]

In [None]:
import urllib.request
import json
import csv

for i in range(0,1000):
    base_url = 'https://collectionapi.metmuseum.org/api/collection/v1/object/'
    objectid = str(int(objectID[i]))
    url = base_url + objectid
    req = urllib.request.Request(url)

    ##parsing response
    r = urllib.request.urlopen(req).read()
    data = json.loads(r.decode('utf-8'))
    
    ##parsing json
    for objectid in data['location']['gallery']:
        location = data['location']['gallery']
        gallery, gallery_num = location.split()
        gallery_num_int = int(gallery_num)
       
        if gallery_num_int > 755 and gallery_num_int < 772:
            var = {"Title": data['titles']['primaryTitle'],"Gallery": data['location']['gallery'],
                   "Image URL": data['media']['images']['primaryImage']['imageUrl'], 
                   "URL": data['metadata']['metaCanonicalURL']}
            myJson.append(var.copy())

# Create table with list values
import pandas
myJson_df = pandas.DataFrame(myJson)
myJson_table = myJson_df.drop_duplicates(keep='first', inplace=False)
print(myJson_table)

In [None]:
# Write to csv
myJson_table.to_csv("./file.csv", sep=',',index=False)


### Step 2: Download images and save to absolute path on hard drive

In [None]:
import urllib

for i in range(len(url)):
    name = label[i]
    print(name)
    link = url[i]
    urllib.request.urlretrieve (link,'/Volumes/Samsung_T3/project-repos/majorstudio/img/'+ name)

### Step 3: See which images you downloaded

In [None]:
import os
import numpy as np

# Create array with names of all images downloaded
jpg = []
for root, dirs, files in os.walk('/Volumes/Samsung_T3/project-repos/majorstudio/img'):
    for file in files:
            jpg.append(file)
            
print(len(jpg))

# Save array to csv
np.savetxt("jpg.csv", jpg,fmt="%s",delimiter=",")

### Step 4: Get downloaded images, extract to transparent bg, and save to new folder

In [None]:
 for i in range(7110,len(label)):
    name= label[i]
    path = '/Volumes/Samsung_T3/project-repos/majorstudio/img/' + name
    path1 = str(path)
    print(path1)
    img = cv.imread(path1)
#     img = cv.blur(img,(2,2))
    mask = np.zeros(img.shape[:2],np.uint8)
    bgdModel = np.zeros((1,65),np.float64)
    fgdModel = np.zeros((1,65),np.float64)
    height, width = img.shape[:2]
    rect = (1,1,width-5,height-5)
    cv.grabCut(img,mask,rect,bgdModel,fgdModel,5,cv.GC_INIT_WITH_RECT)
    mask2 = np.where((mask==2)|(mask==0),0,1).astype('uint8')
    img = img*mask2[:,:,np.newaxis]
    tmp = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
    _,alpha = cv.threshold(tmp,0,255,cv.THRESH_BINARY)
    b, g, r = cv.split(img)
    rgba = [b,g,r, alpha]
    dst = cv.merge(rgba,4)
    plt.imshow(dst),plt.colorbar(),plt.show()
    cv.imwrite('/Volumes/Samsung_T3/project-repos/majorstudio/png/'+ name + '.png',dst)
    



In [None]:
for i in range(7001,len(label)):
    name= label[i]
    path = '/Volumes/Samsung_T3/project-repos/majorstudio/png/' + name + '.png'
    path1 = str(path)
    print(path1)
    img = cv.imread(path1)
    tmp = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
    _,alpha = cv.threshold(tmp,0,255,cv.THRESH_BINARY)
    b, g, r = cv.split(img)
    rgba = [b,g,r, alpha]
    dst = cv.merge(rgba,4)
    cv.imwrite('/Volumes/Samsung_T3/project-repos/majorstudio/png8/'+ name + '.png',dst)
    print("Wrote: " + path1)
    

### Resize and save to absolute path on hard drive

In [None]:
def image_resize(image, width = None, height = None, inter = cv.INTER_AREA):
    # initialize the dimensions of the image to be resized and
    # grab the image size
    dim = None
    (h, w) = image.shape[:2]

    # if both the width and height are None, then return the
    # original image
    if width is None and height is None:
        return image

    # check to see if the width is None
    if width is None:
        # calculate the ratio of the height and construct the
        # dimensions
        r = height / float(h)
        dim = (int(w * r), height)

    # otherwise, the height is None
    else:
        # calculate the ratio of the width and construct the
        # dimensions
        r = width / float(w)
        dim = (width, int(h * r))

    # resize the image
    resized = cv.resize(image, dim, interpolation = inter)

    # return the resized image
    return resized

In [None]:
from PIL import Image

for i in range(7760,len(label)):
    name= label[i]
    path = '/Volumes/Samsung_T3/project-repos/majorstudio/png/' + name + '.png'
    path1 = str(path)
    print(path1)
    basewidth = 50
    img = Image.open(path1)
    wpercent = (basewidth/float(img.size[0]))
    hsize = int((float(img.size[1])*float(wpercent)))
    img = img.resize((basewidth,hsize), Image.ANTIALIAS)
    img.save('/Volumes/Samsung_T3/project-repos/epochrunway/assets/png8/'+ name + ".png")
    print("Wrote: " + path1)