In [55]:
import pandas as pd
from pytesseract import pytesseract
import cv2
import numpy as np
from typing import List, Tuple
import matplotlib.pyplot as plt
from PIL import Image

import os
import sys

pytesseract.tesseract_cmd = r'/usr/bin/tesseract'

import warnings
warnings.simplefilter("ignore")

In [2]:
sys.path.append("../scripts")
from load_data import Loader

loader = Loader()

### methods

In [31]:
def filter_list(all_values:list,key_word:str):
    filtered_list = filter(lambda x: key_word in x, all_values)
    return list(filtered_list)

In [3]:
def get_files_name(directory:str, filter_extension:list=None)->list:
    
    # directory = f'/home/amanuel_zewdu/creative_image_optimization/data/Challenge_Data/Assets/{directory}/'
    
    files = []
    for filename in os.listdir(directory):
        f = os.path.join(directory, filename)
        
        # checking if it is a file
        if filter_extension != None:
            if os.path.isfile(f):
                if filename.split('.')[-1].upper() in filter_extension:
                    files.append(filename)
        else:
            if os.path.isfile(f):
                files.append(filename)

    return files

In [12]:
def locate_image_on_image(locate_image: str, on_image: str, prefix: str = '', visualize: bool = False, color: Tuple[int, int, int] = (0, 0, 255)):
    try:

        image = cv2.imread(on_image)
        gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

        template = cv2.imread(locate_image, 0)

        result = cv2.matchTemplate(gray, template, cv2.TM_CCOEFF)
        _, _, _, max_loc = cv2.minMaxLoc(result)

        height, width = template.shape[:2]

        top_left = max_loc
        bottom_right = (top_left[0] + width, top_left[1] + height)

        if visualize:
            cv2.rectangle(image, top_left, bottom_right, color, 1)
            plt.figure(figsize=(10, 10))
            plt.axis('off')
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            plt.imshow(image)

        return {f'{prefix}top_left_pos': top_left, f'{prefix}bottom_right_pos': bottom_right}

    except cv2.error as err:
        print(err)

### identify assets with logo information

In [14]:
bucket = "s3://10ac-batch-6/data/w11/Challenge_Data.zip"
file_path = "Challenge_Data/performance_data.csv"

df = loader.load_csv(bucket,file_path)

In [40]:
logos = df.copy()
logos.drop(columns=["preview_link","ER","CTR"],inplace=True)

In [41]:
logos["all_files"] = df.game_id.apply(lambda x:get_files_name(f'/home/amanuel_zewdu/creative_image_optimization/data/Challenge_Data/Assets/{x}/'))

In [42]:
logos.head()

Unnamed: 0,game_id,all_files
1,bfbf7a2b7ac635e67877b1ab87fd6629,"[endtext.png, landing_2.jpg, video.mp4, end-ha..."
2,945010afbf9a55bbdffcb0895f946155,"[landing_2.jpg, video.mp4, end-mtlaurel.png, e..."
3,e68e20f592457b875ce29757ab855dfe,"[endtext.png, thumbnail.jpg, landing_2.jpg, en..."
4,adunit-nestle-purina-friskies-mob,"[logo.png, game_2.png, cta.png, engagement_ani..."
5,adunit-lionsgate-uwomt-user-slider-sensory-vid...,"[logo.png, cta.png, shade.png, engagement_anim..."


In [44]:
logos['concat'] = logos.all_files.apply(lambda x: " ".join(x))

In [45]:
logos.concat.str.contains("logo").value_counts()

False    474
True     431
Name: concat, dtype: int64

only 431 creative assets directory contains information about logo

In [46]:
contain_logo = logos[logos.concat.str.contains("logo")]

In [47]:
contain_logo.sample(5)

Unnamed: 0,game_id,all_files,concat
781,adunit-nestle-maggi-varieties-bouillon-ar-ksa-...,"[endtext.png, logo.png, start-bg.png, game_2.p...",endtext.png logo.png start-bg.png game_2.png c...
785,adunit-mouser-user-choice-cpe-av-aus-nz-mpu,"[logo.png, arrow1.png, bg.png, background.png,...",logo.png arrow1.png bg.png background.png cta....
887,ee8ae60b2e46e44da79fb7e59b07eae4,"[endtext.png, game_1.jpg, video-copy-VOD.png, ...",endtext.png game_1.jpg video-copy-VOD.png logo...
467,26877a2094850375ec053651dd7aa4ea,"[endtext.png, flower-circle2.png, logo.png, di...",endtext.png flower-circle2.png logo.png discov...
129,adunit-mouser-user-choice-v2-cpe-uk-mob,"[option2-image.png, logo.png, arrow1.png, back...",option2-image.png logo.png arrow1.png backgrou...


In [48]:
not_contain_logo = logos[~logos.concat.str.contains("logo")]
not_contain_logo.sample(5)

Unnamed: 0,game_id,all_files,concat
791,ba9ca629a79e79a0b652b5b275ecd5e1,"[game_2.png, game_10.png, 5.png, 12.png, 8.png...",game_2.png game_10.png 5.png 12.png 8.png game...
78,b4d26946d68754d77027cc419d36892f,"[game_2.png, click.png, game_10.png, game_7.pn...",game_2.png click.png game_10.png game_7.png en...
234,adunit-not-co-icecream-br-mob,"[endtext.png, game_2.png, cta.png, game_3.png,...",endtext.png game_2.png cta.png game_3.png game...
15,7fc571f85358c5d37efafde99b6896d7,"[thumbnail.jpg, landing_2.jpg, video.mp4, MPU-...",thumbnail.jpg landing_2.jpg video.mp4 MPU-clic...
805,adunit-heineken-pure-malt-v2-mob,"[closebutton.png, popup1.png, hotspot4.png, bg...",closebutton.png popup1.png hotspot4.png bg.png...


### work with the first asset group

In [None]:
# img_path = "/home/amanuel_zewdu/creative_image_optimization/data/Challenge_Data/Assets/2524439faeafa1c2ca1a27cac00a97b1/_preview.png"
# logo_path = "/home/amanuel_zewdu/creative_image_optimization/data/Challenge_Data/Assets/2524439faeafa1c2ca1a27cac00a97b1/logo.png"

img_path = "/home/amanuel_zewdu/creative_image_optimization/data/Challenge_Data/Assets/79c36d2adb94900291f5ddf1f6580c43/endframe_2.png"
logo_path = "/home/amanuel_zewdu/creative_image_optimization/data/Challenge_Data/Assets/79c36d2adb94900291f5ddf1f6580c43/logo.png"

In [56]:
contain_logo['file_name'] = contain_logo.all_files.apply(lambda x: filter_list(x,"logo"))

In [57]:
contain_logo.drop(columns=["all_files","concat"],inplace=True)

In [60]:
contain_logo.sample(5)

Unnamed: 0,game_id,file_name
694,adunit-chanel-j12-tw-black-en-2022-v1-mpu,[logo.png]
59,adunit-van-cleef-and-arpels-frivole-wave-unit1...,"[logo.png, end-logo-ring.png, end-logo-ring-ol..."
325,adunit-nestle-maggi-varieties-bouillon-en-kwt-mob,"[logo_2.png, logo_1.png]"
516,3bf22f2c7f71986b690dc922cb0bee1f,[logo.png]
207,2b7e702f208b7fd60d15d0bdadd269f4,"[logo.png, logo01.png]"


In [62]:
back_up_logo_list = contain_logo.copy()
contain_logo["a"]=contain_logo.apply(lambda x: x.file_name,axis=1)

In [None]:
def find_logo_position(folder_id:str,candidate_logo:list):
    prospect_on_image_names = ["_preview","endframe"]
    





