In [74]:
import pandas as pd
import os
import cv2
import numpy as np
from PIL import Image
import pickle

### Pre-Processing to get images database

In [75]:
# Read images from a folder(folderName), and return a list of images
def readFolderImages(folderName):
    images = []
    for url in os.listdir(folderName):
        longUrl = folderName + '/' + url
        currentImage = readImageFromUrl(longUrl)
        images.append(currentImage)
        
    return images

In [76]:
# Read an image with OpenCV
def readImageFromUrl(url):
    return cv2.imread(url)

In [77]:
# Return true if an image has the right dimension
def keepImgRightSize(img,resolution):
    if max(img.shape[:2]) >= resolution[0]:
        if min(img.shape[:2]) >= resolution[1]:
            return True
        else:
            return False
    else:
        return False
        
        

In [78]:
# Rescale an image if necessary(get the right shape)
def rescaleCVImageToResolution(img, resolution, interpolation = cv2.INTER_CUBIC):
    realResolution = (resolution[1], resolution[0])
    return cv2.resize(img, dsize=realResolution, interpolation= interpolation)

In [79]:
# Rotate an image if necessary(get the right shape)
def rotateImgIfNecessary(img):
    if img.shape[0] > img.shape[1]:
        return np.transpose(img, axes=(1,0,2))
    else:
        return img

In [127]:
# Image to Vector
def reshapeImageToVector(image):
    return np.reshape(image, (1,-1))[0]

In [128]:
# return Images reshpaed correcly 
def getImagesFromFolderWithMinimumResolution(url, resolution):
    images = readFolderImages(url)
    return [reshapeImageToVector(rescaleCVImageToResolution(rotateImgIfNecessary(img),resolution)) for img in images if keepImgRightSize(img, resolution)]
    

In [95]:
# Save images as pkl
def saveImageListToPickle(images, toUrl, toName):
    if not os.path.exists(toUrl):
        os.makedirs(toUrl)        
    with open(toUrl+'/'+toName+'.pkl', 'wb') as f:
        pickle.dump(images, f)

In [194]:
# Permet de générer tout les pickles de toutes les images contenues dans les dossiers présents dans path
def foldersToPickles(path, resolution, dest):
    i=0
    for url in os.listdir(path):
        images = getImagesFromFolderWithMinimumResolution('flowers/tulip', resolution)
        saveImageListToPickle(images, 'dataBasePkl', dest+str(i))
        i+=1

In [168]:
# A partir d'une liste d'images et un label commun, on retourne un array avec comme premier element le label, 
# puis l'image
def getArrayWithLabels(images, label):
    trainSet = np.zeros((len(images), images[0].shape[0]+1))
    i = 0
    for image in  images:
        trainSet[i, 0] = label
        trainSet[i,1:] = image
        i+=1
    return trainSet

In [212]:
# Prend en entrée une liste de la forme suivante[(pickle_0,label_1), ..., (pickle_n, label_n)]
# Attention, les vecteurs contenus dans les pickle doivent avoir la même taille! Pusso
def getTrainSetFromPickles(picklesWithLabels):
    i = 0
    for pickleFile in picklesWithLabels:
        print(i)
        with open(pickleFile[0], 'rb') as f:
            pick = pickle.load(f)
        if i==0:
            trainSet = getArrayWithLabels(pick, pickleFile[1])
            i +=1
        else:
            trainSet = np.concatenate((trainSet, getArrayWithLabels(pick, pickleFile[1])))
    return trainSet

            


### Création du dataSet

In [192]:
resolution = [180, 240]

In [195]:
foldersToPickles('flowers', resolution, 'pusso', 'flowers')
foldersToPickles('fruits/Training', resolution, 'fruits')

In [214]:
testFiles = []
for i in range(6):
    testFiles.append(('dataBasePkl/flowers'+str(i)+'.pkl', 0))
for i in range(82):
    testFiles.append(('dataBasePkl/fruits'+str(i)+'.pkl', 1))

In [215]:
trainData = getTrainSetFromPickles(testFiles)

0
1
1
1
1
1
1
1
1
1


KeyboardInterrupt: 

In [186]:
trainSet.shape


(1667, 129601)