# Инфраструктура для мэтчинга

In [1]:
import numpy as np
import pandas as pd
from scipy.signal import find_peaks

import os
import shutil
import requests
import uuid
import urllib
import urllib.request
import httplib2

import xlsxwriter

from PIL import Image
import cv2

from tqdm.auto import tqdm
import time
import random

## Обработка картинок

In [2]:
def background2any(fname, new=(0, 0, 0, 0), old=None, delete=False, new_fname=None):
    
    img = Image.open(f'{fname}').convert("RGBA")
    if delete:
        os.remove(fname)

    size = img.size
    pics = list(img.getdata())
    
    if old is None:
        old = pics[0]
    
    if new == 'old':
        new = old
    
    def is_old(p, c):
        for a, b in zip(p, c):
            if not (-7 <= a-b <= 7):###############################
                return False
        return True
    
    new_img = [new] * size[0] * size[1]
    for y in range(size[1]):
        left = y * size[0]
        right = (y + 1) * size[0] - 1
        while left < right and is_old(pics[left], old):
            left += 1
        while left < right and is_old(pics[right], old):
            right -= 1
        for i in range(left, right):
            new_img[i] = pics[i]

    img.putdata(new_img)

    if new_fname is None:
        new_fname = '.'.join(fname.split('.')[:-1]) + '_b2a.png'

    img.save(new_fname, 'PNG')

In [3]:
def background2black(fname, old=None, delete=False, new_fname=None):

    if old is None:
        img = Image.open(f'{fname}').convert("RGBA")
        old = np.array(pics[0], dtype=np.uint8)
    
    lower = np.where(old-7 < 0, 0, old-7)
    upper = np.where(old+7 > 255, 255, old+7)

    image = cv2.imread(fname)
    if delete:
        os.remove(fname)

    mask = cv2.inRange(image, lower, upper)
    mask = cv2.bitwise_not(mask)
    res = cv2.bitwise_and(image, image, mask=mask)

    if new_fname is None:
        new_fname = '.'.join(fname.split('.')[:-1]) + '_b2b.png'

    cv2.imwrite(new_fname, res)

In [4]:
def crop_box(fname, wid=8, back=None, delete=False, new_fname=None):
    
    img = Image.open(f'{fname}')#.convert("RGBA")
    if delete:
        os.remove(fname)
    
    size = img.size
    pics = list(img.getdata())

    if back is None:
        back = pics[0]    
    
    min_left = size[0]
    min_right = size[0]

    for y in range(size[1]):
        left = 0
        right = 0
        while y*size[0] + left < (y+1)*size[0]-1 - right and pics[y*size[0] + left] == back:
            left += 1
        while y*size[0] + left < (y+1)*size[0]-1 - right and pics[(y+1)*size[0]-1 - right] == back:
            right += 1
        if left and right:
            min_left = min(min_left, left)
            min_right = min(min_right, right)

    img = img.crop((min_left-wid, 0, size[0]-min_right+wid, size[1]))
    
    if new_fname is None:
        new_fname = '.'.join(fname.split('.')[:-1]) + '_cb.png'

    img.save(new_fname, 'PNG')

In [5]:
def delete_border(fname, wid=2, back=None, delete=False, new_fname=None):
    
    img = Image.open(f'{fname}')#.convert("RGBA")
    if delete:
        os.remove(fname)
  
    size = img.size
    pics = list(img.getdata())
    
    if back is None:
        back = pics[0]
    
    new_img = [back] * size[0] * size[1]

    for y in range(size[1]):
        left = y * size[0]
        right = (y + 1) * size[0] - 1
        while left+wid < right-wid and pics[left] == back:
            left += 1
        while left+wid < right-wid and pics[right] == back:
            right -= 1
        for i in range(left+wid, right-wid):
            new_img[i] = pics[i]
        
    for x in range(size[0]):
        up = x
        down = size[0] * (size[1]-1) + x
        while up + wid*size[0] < down - wid*size[0] and pics[up] == back:
            up += size[0]
        while up + wid*size[0] < down - wid*size[0] and pics[down] == back:
            down -= size[0]
        for i in range(wid):
            new_img[up + i*size[0]] = back
            new_img[down - i*size[0]] = back

    img.putdata(new_img)

    if new_fname is None:
        new_fname = '.'.join(fname.split('.')[:-1]) + '_db.png'

    img.save(new_fname, 'PNG')

In [6]:
def crop_label_old(fname, label=None, delete=False, new_fname=None):
    
    if label is None:
        crop_box(fname, delete=True, new_fname=fname)
        label = (0, 1/3, 0, 0)

    img = Image.open(f'{fname}')#.convert("RGBA")
    if delete:
        os.remove(fname)
        
    size = img.size
    new = [0] * 4
    side = 0
    for l in label:
        new[side] = (0 if side < 2 else size[side % 2]) +\
                    (1 if side < 2 else -1) * l * (size[side % 2] if l < 1 else 1)
        side += 1
    new = tuple(new)

    img = img.crop(new)
    
    if new_fname is None:
        new_fname = '.'.join(fname.split('.')[:-1]) + '_clo.png' #jpg'

    img.save(new_fname, 'PNG')

In [7]:
def delta(x, n=10):
    return abs(x[n::n] - x[:-n:n])

In [8]:
def moving_average(a, n=3) :
    ret = np.cumsum(a, dtype=float)
    ret[n:] = ret[n:] - ret[:-n]
    return ret[n - 1:] / n

In [9]:
def crop_label_new(fname, crop_unsuccess=True, delete=False, new_fname=None):
    
    img_ = cv2.imread(fname)
    if delete:
        os.remove(fname)
    
    img = cv2.cvtColor(img_, cv2.COLOR_BGR2GRAY)
    
    x = np.sum(img, axis=1).astype(float)

    d = 20
    x = delta(x, d)
    
    q = 0.75
    height = int(np.quantile(x, q))#max()*0.15)
    distance = int(img.shape[0]*0.2/d)
    peaks, _ = find_peaks(x, height=height, distance=distance)
    peaks *= d
    
    if new_fname is None:
        new_fname = '.'.join(fname.split('.')[:-1]) + '_cln.jpg'

    if peaks.shape[0] >= 2:
        img_ = img_[peaks[0]:peaks[-1], :]
        cv2.imwrite(new_fname, img_)
        crop_box(new_fname, delete=True, new_fname=new_fname)
        return True

    if crop_unsuccess:
        crop_label_old(fname, delete=delete, new_fname=new_fname)
    return False

In [41]:
def full_change(fname, label='mix', delete=False, new_fname=None):

    try:
        if new_fname is None:
            new_fname = '.'.join(fname.split('.')[:-1])+'_fc.png'

        background2any(fname, new='old', delete=delete, new_fname=new_fname)

        delete_border(new_fname, delete=True, new_fname=new_fname)

        if label is None:
            crop_box(new_fname, delete=True, new_fname=new_fname)
        elif label == 'old':
            crop_label_old(new_fname, delete=True, new_fname=new_fname)
        elif label == 'new':
            crop_label_new(new_fname, crop_unsuccess=True, delete=True, new_fname=new_fname)
        elif label == 'mix':
            crop_label_old(new_fname, delete=True, new_fname=new_fname)
            crop_label_new(new_fname, crop_unsuccess=False, delete=True, new_fname=new_fname)
        return True
    except:
        return False

In [11]:
def convert2any(fname, format_new, delete=False, new_fname=None):

    try:
        img_ = Image.open(fname)
        if delete:
            os.remove(fname)

        if new_fname is None:
            new_fname = '.'.join(fname.split('.')[:-1])+'_c2a'
            if format_new == 'JPG':
                new_fname += '.jpg'
            elif format_new == 'PNG':
                new_fname += '.png'

        if format_new == 'JPG':
            #img = Image.new('RGB', img_.size, (255, 255, 255))
            #alpha_composite = Image.alpha_composite(img, img_)
            #alpha_composite.save(new_fname, 'JPEG', quality=80)
            img_.convert('RGB').save(new_fname, 'JPEG')
        elif format_new == 'PNG':
            img_.convert('RGBA').save(new_fname, 'PNG')
        return True
    except:
        return False

In [12]:
def deleteany(format_delete, path='.', exceptions=[]):
    for img_path in [p for p in os.listdir(path) if p.endswith('.'+format_delete)]:
        if not img_path in exceptions:
            os.remove(path+'/'+img_path)

## Загрузка картинок

In [13]:
def download_picture_1(url, fname):
    response = requests.get(url, stream=True)
    out = open(fname, 'wb')
    shutil.copyfileobj(response.raw, out)
    del response

In [14]:
def download_picture_2(url, fname):
    urllib.request.urlretrieve(url, fname)

In [15]:
def download_picture_3(url, fname):
    resource = urllib.request.urlopen(url)
    out = open(fname, 'wb')
    out.write(resource.read())
    out.close()

In [16]:
def download_picture_4(url, fname):
    h = httplib2.Http('.cache')
    response, content = h.request(url)
    out = open(fname, 'wb')
    out.write(content)
    out.close()

In [17]:
def download_picture(url, fname):
    for f in [download_picture_1, download_picture_2, download_picture_3, download_picture_4]:
        try:
            f(url, fname)
            im = Image.open(fname)
            #im.save(fname)
        except:
            try:
                os.remove(fname)
            except:
                continue
        else:
            return True
    return False

## API

In [32]:
auth_key = "Bearer wyZ546S2fLyGlcBL61r2LAEiRhTuSXAHtHORFRuD"

In [19]:
def get_vintage_id(fname, auth_key):

    try:
        url = f'https://api.vivino.com/scans/label?image_type=jpg&add_user_vintage=false&queue_tier_matching=false&debug_should_fail=false&label_ocr=&label_ocr_source=android_firebase-ml-vision_18.0.0&app_version=2022.22.0&api_version=9.0.0&app_platform=android&app_phone=samsung%20SM-G988N%20z3q%207.1.2&os_version=7.1.2&country_code=ru&uuid={str(uuid.uuid4())}&language=en'
        files = {'image': open(fname, 'rb')}
        scan_data = requests.post(url, 
                   headers={"authorization": auth_key}, 
                   files=files)
        time.sleep(1)
        url_get_vintage = f"https://api.vivino.com/scans/label/{scan_data.json()['processing_id']}"
        vintage = requests.get(url_get_vintage, 
                     headers={"authorization": auth_key})
        return vintage.json()['vintage_id']
    except:
        return False

In [20]:
def get_wine_id(vintage_id):
    try:
        wine = requests.get(f'https://api.vivino.com/vintages/{vintage_id}').json()
        time.sleep(1)
        return wine['wine']['id']
    except:
        return False

## Мэтчинг

In [42]:
def matching(url, auth_key, fname=None):

    name = str(random.getrandbits(31))
    fname_ = name+'.png'
    if fname is None:
        fname = name+'.jpg'

    if not download_picture(url, fname_):
        return 'Bad picture download'
    
    if not (full_change(fname_, label='mix', delete=True, new_fname=fname_)
            and convert2any(fname_, format_new='JPG', delete=True, new_fname=fname)):
        return 'Bad picture processing'
    
    vintage_id = get_vintage_id(fname, auth_key)
    if vintage_id is None:
        return 'No vintage_id'
    elif not vintage_id:
        return 'Bad API vintage_id'

    wine_id_api = get_wine_id(vintage_id)
    if not wine_id_api:
        return 'Bad API wine_id'
    return wine_id_api

### Тестирование

In [40]:
def test(url, auth_key, fname=None, label=None):

    name = str(random.getrandbits(31))
    fname_ = name+'.png'
    if fname is None:
        fname = name+'.jpg'
        

    if not download_picture(url, fname_):
        return 'Bad picture download'
    
    if not (full_change(fname_, label=label, delete=True, new_fname=fname_)
            and convert2any(fname_, format_new='JPG', delete=True, new_fname=fname)):
        return 'Bad picture processing'
    
    vintage_id = get_vintage_id(fname, auth_key)
    if vintage_id is None:
        return 'No vintage_id'
    elif not vintage_id:
        return 'Bad API vintage_id'

    wine_id_api = get_wine_id(vintage_id)
    if not wine_id_api:
        return 'Bad API wine_id'
    return wine_id_api

# Данные

In [None]:
path = '' #путь к файлу с винами

In [None]:
df = pd.read_excel(path)

In [None]:
df.head()

# Процесс

In [None]:
auth_key = "Bearer wyZ546S2fLyGlcBL61r2LAEiRhTuSXAHtHORFRuD"

In [None]:
result = {'Bad picture download' : 0, 'Bad picture processing' : 0, 'Bad API vintage_id' : 0, 'No vintage_id' : 0, 'Bad API wine_id' : 0, 'Different wine_id' : 0, 'Matching' : 0}
for i in tqdm(df.index):

    image_merchant = #df.loc[i, 'Изображения'] #присвоить ссылку на изображение

    res = matching(url=image_merchant, auth_key=auth_key)
    if type(res) == int:
        wine_id_api = res
        res = 'Matching'
    else:
        wine_id_api = ''

    # = wine_id_api #присвоить найденный wine_id
    # = res #присвоить результат текстом

    if res in result:
        result[res] += 1
    else:
        result[res] = 1

for k in result:
    print(k, result[k])

#