In [None]:
import numpy as np
import pandas as pd

from src.reader import Reader
from src.downloader import Downloader
from src.localreader import LocalReader, SHRINK, CENTERING, getMean

In [None]:
#Download images Starting from 1st challange to 10th challange
#In other words dowload 10 1st subsets
#downloader = Downloader(start=0, end=10)
#downloader.download(path="data/AVA.txt")

In [None]:
reader = LocalReader(width=256, height=256, shaping=SHRINK)
for images, scores in reader.read(start=0, end=3):
    print(images.shape)
    print(scores.shape)
    score_mean = getMean(scores)
    print(score_mean.shape)
    print(pd.Series(score_mean).describe())

In [47]:
import urllib.request
import tarfile
import time
import os

import numpy as np
import cv2 as cv

from typing import List, Dict, Tuple
from PIL import Image

from src.util import progress
from src.util import get_links_from_file

SHRINK = 'shrink'
CENTERING = 'centering'

class LocalReader():
    def __init__(self, height=256, width=256, validation_size=0.1 ,shaping=SHRINK, 
                 img_path='data/images/original', ava_path='data/AVA.txt')-> None:
        super().__init__()
        self.validation_size = validation_size
        self.size = (width, height)
        self.shaping = shaping
        self.img_path = img_path
        self.ava_path = ava_path

    def train(self, start=0, end=3)->Tuple:
        current=0
        for tar in os.listdir(self.img_path):
            if current == end:    
                break
            elif current < start:
                current += 1
                continue
            else:
                current += 1
                yield self.get_images(tar_name=tar, isValidation=False)
                
    def validate(self, start=0, end=3)->Tuple:
        current=0
        for tar in os.listdir(self.img_path):
            if current == end:    
                break
            elif current < start:
                current += 1
                continue
            else:
                current += 1
                yield self.get_images(tar_name=tar, isValidation=True)
                
    def get_images(self, tar_name:str, isValidation:bool)->Tuple:
        images = []
        labels = []
        label_map = {}
        with tarfile.open(self.img_path + "/" + tar_name, "r:gz") as tar:
            label_map = self.get_scores(tar_name.split(".")[0])
            tar_size = len(tar.getmembers())

            start = 0
            end = 0
            
            if(isValidation):
                start = int(tar_size * (1 - self.validation_size))
                end = tar_size
            else:
                start = 0
                end = int(tar_size * (1 - self.validation_size))
                
            print("Tottal: {0}".format(tar_size))
            for count, member in enumerate(tar.getmembers()):
                if(count>= start and count<end):                    
                    file = tar.extractfile(member)
                    labels.append(label_map[member.name.split(".")[0]])
                    img = np.asarray(bytearray(file.read()), dtype="uint8")
                    images.append(self.procces_image(img))
                
        return (np.array(images), np.array(getMean(labels)))
    
    def get_scores(self, challenge:str)->Dict:
        can_break = False
        score_map = {}
        with open(self.ava_path, "r") as file:
            for line in file:
                values = line.split(" ")
                if challenge == values[14].rstrip():
                    score_map[values[1]] = list(map(int, values[2:12]))
                    can_break = True
                elif can_break:
                    return score_map
            return score_map
    
    def procces_image(self, img)->np.ndarray:
        img = cv.imdecode(img, cv.IMREAD_COLOR)
        if self.shaping == SHRINK:
            return cv.resize(img, self.size)
        elif self.shaping == CENTERING:
            #TODO: Finish Centring
            print(img.shape)
            width = int(img.shape[0]/2) - int(self.size[0]/2)
            height = int(img.shape[1]/2) - int(self.size[1]/2)
            img = img[width:width+self.size[0], height:height+self.size[1]] 
            print(img.shape)
            return img

def mean_function(labels):
    result = 0
    for i in range(len(labels)):
        result += (i + 1) * labels[i]
    result /= np.sum(labels)
    return result
        
def getMean(scores):
    return np.array(list(map(mean_function, scores)), dtype=np.float64)

In [48]:
reader = LocalReader(validation_size=0.2)
for t in reader.train(end=1):
    print(t[0].shape)
    print(t[1].shape)
    
for v in reader.validate(end=1):
    print(v[0].shape)
    print(v[1].shape)

Tottal: 234
(187, 256, 256, 3)
(187,)
Tottal: 234
(47, 256, 256, 3)
(47,)
