### Bibliotecas e Dependências

In [134]:
from base64 import b64encode
from os import makedirs
from os.path import join, basename
from sys import argv
import json
import requests

### Variáveis Estáticas

In [135]:
ENDPOINT_URL = 'https://vision.googleapis.com/v1/images:annotate'
RESPONSE_DIR = 'gcv_api_responses/'
IMAGES_DIR = 'COCO-Text-words-test'
GOOGLE_API_KEY = 'AIzaSyB855HzUXCySqWreEASl4Rv-ru3TReb17s'
OUTPUT_RESULT_FOLDER = 'task_2_submission_results/'

### Momento atual

In [136]:
from datetime import datetime

def get_date():
    currentSecond= datetime.now().second
    currentMinute = datetime.now().minute
    currentHour = datetime.now().hour

    currentDay = datetime.now().day
    currentMonth = datetime.now().month
    currentYear = datetime.now().year

    return str(currentYear) + '_' + str(currentMonth) + '_' + str(currentDay)+ '_' + str(currentHour) + '_' + str(currentMinute)

### Image to JSON Annotation

In [150]:
def image_to_json_annotations(GOOGLE_API_KEY, image_filename):
    # Decodifica imagem e a insere como conteudo da requisicao a API
    with open(image_filename, 'rb') as image_file:
        image_content = b64encode(image_file.read()).decode()
        image_request = {
                'image': {'content': image_content},
                'features': [
                {
                    'type': 'TEXT_DETECTION'
                }]
        }

    #imagem em bytes
    image_data = json.dumps({'requests': image_request})#.encode()

    # Recebimento da requisicao
    response = requests.post(ENDPOINT_URL,
                             data=image_data,
                             params={'key': GOOGLE_API_KEY},
                             headers={'Content-Type': 'application/json'})

    # Salva resposta em arquivo
    json_response = response.json()['responses']
    json_filename = image_filename.split('.')[0]
    json_output_folder = join(RESPONSE_DIR, basename(json_filename) + '.json')

    with open(json_output_folder, 'w') as json_file:
        #json.dumps(json_response, indent=2) #mais organizado, mais espaço
        json_txt_content = json.dumps(json_response, indent=2)
        json_file.write(json_txt_content)

### Estimativa Ruído na Imagem

In [151]:
import cv2
from skimage.restoration import estimate_sigma

def estimate_noise(image_path):
    img = cv2.imread(image_path)
    return estimate_sigma(img, multichannel=True, average_sigmas=True)

### Remocão de Ruídos

In [152]:
import os
import numpy as np 
import cv2 
from matplotlib import pyplot as plt 
from skimage import restoration
import scipy.misc


def denoise_tv(filename):
    img = cv2.cvtColor(cv2.imread(filename), cv2.COLOR_BGR2RGB)
    tv_filter_coins = restoration.denoise_tv_chambolle(img, weight=0.05)
    cv_img = scipy.misc.toimage(tv_filter_coins)
    scipy.misc.imsave(filename, cv_img)

### Processamento das imagens

In [153]:
import os

def processar_lote_imagens(IMAGES_DIR, remover_ruido=False):
    for root, directories, files in os.walk(IMAGES_DIR):
        for image_file in files:
            if(image_file.endswith('.jpg')):
                image_path = os.path.join(root, image_file)
                if(remover_ruido):
                    denoise_tv(image_path)
                    image_to_json_annotations(GOOGLE_API_KEY, image_path)
                else:
                    image_to_json_annotations(GOOGLE_API_KEY, image_path)

### JSON to Dataframe

In [155]:
import json
import ast
import pandas as pd
from pandas.io.json import json_normalize

def json_to_data(json_file):
    with open(json_file, 'r') as f:
        data = json.load(f)
        all_data = json_normalize(data)
        
    #return ast.literal_eval(json.dumps(data)) 
    return data

### Geracao de Resultados

In [168]:
import pandas as pd 
import re
import os
import string
import math

def gera_resultados_submissao(OUTPUT_RESULT_FOLDER, RESPONSE_DIR):
    file = open(OUTPUT_RESULT_FOLDER+'res_tk2_'+get_date()+'.txt', 'aw+')
    files = os.listdir(RESPONSE_DIR)
    
    for name in files:
        file_name = RESPONSE_DIR+name
        data = json_to_data(file_name)

        if 'fullTextAnnotation' in data:
            df_data_full = pd.DataFrame(data['fullTextAnnotation'])
            line = df_data_full['text'][0].encode('ascii', 'ignore').decode('ascii')
            line = line.replace('\n','')

            file_sub_name = name.split('.')[0]
            if line != '':
                content = str(file_sub_name) + ',' + line.lower().encode('ascii').replace(' ', '').replace(';','') + '\n'
                file.write(content)            
        else:
            file_sub_name = name.split('.')[0]
            content = str(file_sub_name) + ',' + '' + '\n'
            file.write(content)

In [None]:
processar_lote_imagens(IMAGES_DIR, remover_ruido=False)

In [169]:
gera_resultados_submissao(OUTPUT_RESULT_FOLDER, RESPONSE_DIR)