In [1]:
import pandas as pd
from pytesseract import pytesseract
import cv2
import numpy as np
from typing import List, Tuple
import matplotlib.pyplot as plt
from PIL import Image

import os
import sys
import glob

pytesseract.tesseract_cmd = r'/usr/bin/tesseract'

import warnings
warnings.simplefilter("ignore")

In [2]:
sys.path.append("../scripts")
from load_data import Loader

loader = Loader()

In [3]:
DIRECTORY_PATH = "/home/amanuel_zewdu/creative_image_optimization/data/Challenge_Data/Assets/"

IMAGES_EXT = ["JPG","PNG","GIF","WEBP","TIFF","PSD","RAW","BMP","HEIF","INDD","JPEG"]

VIDEO_EXT = ["WEBM","MPG","MP2","MPEG","MPE","MPV","OGG","MP4","M4P","M4V","AVI","WMV","MOV","QT","FLV","SWF"]

### methods

In [4]:
def filter_list(all_values:list,key_word:str):
    filtered_list = filter(lambda x: key_word in x.lower(), all_values)
    return list(filtered_list)

In [5]:
def get_files_name(directory:str, filter_extension:list=None)->list:
    
    # directory = f'/home/amanuel_zewdu/creative_image_optimization/data/Challenge_Data/Assets/{directory}/'
    
    files = []
    for filename in os.listdir(directory):
        f = os.path.join(directory, filename)
        
        # checking if it is a file
        if filter_extension != None:
            if os.path.isfile(f):
                if filename.split('.')[-1].upper() in filter_extension:
                    files.append(filename)
        else:
            if os.path.isfile(f):
                files.append(filename)

    return files

In [6]:
def locate_image_on_image(locate_image: str, on_image: str, prefix: str = '', visualize: bool = False, color: Tuple[int, int, int] = (0, 0, 255)):
    try:

        image = cv2.imread(on_image)
        gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        shape = gray.shape

        
        template = cv2.imread(locate_image, 0)

        result = cv2.matchTemplate(gray, template, cv2.TM_CCOEFF)
        _, _, _, max_loc = cv2.minMaxLoc(result)

        height, width = template.shape[:2]

        top_left = max_loc
        bottom_right = (top_left[0] + width, top_left[1] + height)

        if visualize:
            cv2.rectangle(image, top_left, bottom_right, color, 1)
            plt.figure(figsize=(10, 10))
            plt.axis('off')
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            plt.imshow(image)
        
        return (shape,top_left,bottom_right)
            
        
        # return {f'{prefix}top_left_pos': top_left, f'{prefix}bottom_right_pos': bottom_right}

    except cv2.error as err:
        # print(err)
        # top_left = (0,0)
        # bottom_right = (0,0)
        # shape = (0,0)

        # return (shape,top_left,bottom_right)
        raise Exception

In [7]:
def find_logo_position(folder_id:str,candidate_logo:list,ntry=0):
    
    prospect_on_image_names = ["preview","endframe","game"]
    max_try = len(prospect_on_image_names)

    try:

        img_path = glob.glob(f'{DIRECTORY_PATH}{folder_id}/*{prospect_on_image_names[ntry]}*.*')[0]
        logo_img = f'{DIRECTORY_PATH}{folder_id}/{candidate_logo[0]}'

        shape, top_left, bottom_right = locate_image_on_image(locate_image=logo_img, on_image=img_path)
        return (shape, top_left, bottom_right)
    
    except Exception as e:
        n = ntry + 1
        if n < max_try:
            return find_logo_position(folder_id,candidate_logo,n)
        return ((0,0), (0,0), (0,0))


In [8]:
def find_logo_area_ratio(shape:tuple,top_left:tuple,bottom_right:tuple):
    try:
        total_area = shape[0] * shape[1]
        logo_area = (bottom_right[0]-top_left[0]) * (bottom_right[1]*top_left[1])
        return logo_area/total_area
    except:
        return 0

### identify assets with logo information

In [9]:
bucket = "s3://10ac-batch-6/data/w11/Challenge_Data.zip"
file_path = "Challenge_Data/performance_data.csv"

df = loader.load_csv(bucket,file_path)

In [10]:
logos = df.copy()
logos.drop(columns=["preview_link","ER","CTR"],inplace=True)

In [11]:
logos["all_files"] = df.game_id.apply(lambda x:get_files_name(f'/home/amanuel_zewdu/creative_image_optimization/data/Challenge_Data/Assets/{x}/'))

In [12]:
logos.head()

Unnamed: 0,game_id,all_files
1,bfbf7a2b7ac635e67877b1ab87fd6629,"[endtext.png, landing_2.jpg, video.mp4, end-ha..."
2,945010afbf9a55bbdffcb0895f946155,"[landing_2.jpg, video.mp4, end-mtlaurel.png, e..."
3,e68e20f592457b875ce29757ab855dfe,"[endtext.png, thumbnail.jpg, landing_2.jpg, en..."
4,adunit-nestle-purina-friskies-mob,"[logo.png, game_2.png, cta.png, engagement_ani..."
5,adunit-lionsgate-uwomt-user-slider-sensory-vid...,"[logo.png, cta.png, shade.png, engagement_anim..."


In [13]:
logos['concat'] = logos.all_files.apply(lambda x: " ".join(x))

In [14]:
logos.concat.str.lower().str.contains("logo").value_counts()

False    474
True     431
Name: concat, dtype: int64

only 431 creative assets directory contains information about logo

In [15]:
contain_logo = logos[logos.concat.str.lower().str.contains("logo")]

In [16]:
contain_logo.sample(5)

Unnamed: 0,game_id,all_files,concat
446,adunit-heineken-celebrations2-za-mob,"[star.png, bottle2.png, game_text_2.png, logo....",star.png bottle2.png game_text_2.png logo.png ...
56,73adaebd0fbf7778d06d9d711ba5c152,"[logo.png, video.mp4, f1-top-banner.png, f2-ct...",logo.png video.mp4 f1-top-banner.png f2-cta.jp...
252,23fb8e9094b55306b08f7157c4b5ec43,"[logo.png, video.mp4, engagement_animation.png...",logo.png video.mp4 engagement_animation.png mp...
17,79c36d2adb94900291f5ddf1f6580c43,"[f4-copy-2.png, logo.png, f4-cta.png, video.mp...",f4-copy-2.png logo.png f4-cta.png video.mp4 f4...
862,b12b03c2dec5e71fe630d3b06062982a,"[logo.png, f3-bg.jpg, cta.png, f3-logo.png, _p...",logo.png f3-bg.jpg cta.png f3-logo.png _previe...


In [17]:
not_contain_logo = logos[~logos.concat.str.lower().str.contains("logo")]
not_contain_logo.sample(5)

Unnamed: 0,game_id,all_files,concat
378,0a18978cdc8b64f900b0db6a297eb99d,"[engagement_instruction_2.png, engagement_inst...",engagement_instruction_2.png engagement_instru...
464,adunit-chanel-j12-singapore-exxon-mob,"[white-rect.png, landing_2.jpg, end_text_1.png...",white-rect.png landing_2.jpg end_text_1.png ct...
662,cc6c33bd4409ded3155793811af5c72b,"[thumbnail.jpg, video.mp4, MPU-click-area.png,...",thumbnail.jpg video.mp4 MPU-click-area.png hea...
176,adunit-lionsgate-uwomt-sensory-plus-tune-3-mob,"[video.mp4, fr-1-bg.jpg, end-cta.png, end-bg.jpg]",video.mp4 fr-1-bg.jpg end-cta.png end-bg.jpg
495,adunit-yamaha-wolverine-user-choice-av-reuse-mpu,"[2-highlight.png, engagement_instruction.png, ...",2-highlight.png engagement_instruction.png end...


### work with the first asset group

In [18]:
# img_path = "/home/amanuel_zewdu/creative_image_optimization/data/Challenge_Data/Assets/2524439faeafa1c2ca1a27cac00a97b1/_preview.png"
# logo_path = "/home/amanuel_zewdu/creative_image_optimization/data/Challenge_Data/Assets/2524439faeafa1c2ca1a27cac00a97b1/logo.png"

img_path = "/home/amanuel_zewdu/creative_image_optimization/data/Challenge_Data/Assets/79c36d2adb94900291f5ddf1f6580c43/endframe_2.png"
logo_path = "/home/amanuel_zewdu/creative_image_optimization/data/Challenge_Data/Assets/79c36d2adb94900291f5ddf1f6580c43/logo.png"

In [18]:
contain_logo['file_name'] = contain_logo.all_files.apply(lambda x: filter_list(x,"logo"))

In [19]:
contain_logo.drop(columns=["all_files","concat"],inplace=True)

In [20]:
contain_logo.sample(5)

Unnamed: 0,game_id,file_name
781,adunit-nestle-maggi-varieties-bouillon-ar-ksa-...,[logo.png]
75,adunit-mouser-user-choice-v2-cpe-av-canada-mpu,[logo.png]
189,adunit-mouser-user-choice-v2-cpe-aus-nz-mpu,[logo.png]
175,adunit-ihop-window4-scary-face-mob,"[logo.png, f2-ihop-logo.png]"
316,3a6d279abd87aadd09f950bcb123e358,"[f1-logo.png, f4-logo-rev.png, f3-logo.png, f2..."


In [21]:
back_up_logo_list = contain_logo.copy()

In [108]:
contain_logo = back_up_logo_list.copy()

In [22]:

contain_logo[["shape","top_left","bottom_right"]] =  contain_logo.apply(lambda x: pd.Series(find_logo_position(x.game_id,x.file_name)) ,axis = 1)

In [112]:
contain_logo.sample(5)

Unnamed: 0,game_id,file_name,shape,top_left,bottom_right
454,265680a6c85f2381bb49965579b0fd89,[logo.png],"(900, 600)","(266, 40)","(335, 126)"
85,adunit-nutrella-barrilito-mpu,"[nutrella-logo.png, logo-end.png]","(0, 0)","(0, 0)","(0, 0)"
705,eb31bf1739ffa01b1d458fe2c610230a,"[logo.png, f5-logo.png, video-logo.png]","(900, 600)","(135, 21)","(466, 85)"
727,adunit-mouser-user-choice-v3-cpe-av-us-mpu,[logo.png],"(500, 600)","(151, 327)","(366, 398)"
469,adunit-christian-dior-popup-store-mob,[logo.jpg],"(57, 57)","(195, 7)","(795, 97)"


In [121]:
# Logo Area Ratio
contain_logo[["LAR"]] =  contain_logo.apply(lambda x: pd.Series(find_logo_area_ratio(x["shape"],x["top_left"],x["bottom_right"])) ,axis = 1)


In [125]:
contain_logo.sample(5)

Unnamed: 0,game_id,file_name,shape,top_left,bottom_right,LAR
268,5a051f7c0a58d5301f9953827218bc55,"[logo.png, f2-logo.png]","(900, 600)","(89, 6)","(511, 202)",0.947156
749,e17b6db0e4fb777ab569ef0a81b5000e,[logo.png],"(900, 600)","(0, 0)","(600, 900)",0.0
580,d7c5bf54f39eb745b553d06813cffb23,[logo.png],"(900, 600)","(162, 39)","(438, 86)",1.714267
648,adunit-lionsgate-uwomt-user-slider-sensory-vid...,"[logo.png, logo-old.png]","(900, 600)","(217, 346)","(540, 461)",95.408219
686,5b4d2cc82bf11b1fa80b366fdd7a5867,"[logo.png, f3-logo.png]","(900, 600)","(173, 39)","(427, 125)",2.293056


In [25]:
# contain_logo.drop(columns=['shape','top_left','bottom_right'],inplace=True)

In [111]:
# contain_logo[contain_logo['shape'] == (0,0)]