In [1]:
import pandas as pd
from pytesseract import pytesseract
import cv2
import numpy as np
from typing import List, Tuple
import matplotlib.pyplot as plt
from PIL import Image

import os
import sys

pytesseract.tesseract_cmd = r'/usr/bin/tesseract'

import warnings
warnings.simplefilter("ignore")

In [2]:
sys.path.append("../scripts")
from load_data import Loader

loader = Loader()

In [3]:
DIRECTORY_PATH = "/home/amanuel_zewdu/creative_image_optimization/data/Challenge_Data/Assets/"

IMAGES_EXT = ["JPG","PNG","GIF","WEBP","TIFF","PSD","RAW","BMP","HEIF","INDD","JPEG"]

VIDEO_EXT = ["WEBM","MPG","MP2","MPEG","MPE","MPV","OGG","MP4","M4P","M4V","AVI","WMV","MOV","QT","FLV","SWF"]

### methods

In [4]:
def filter_list(all_values:list,key_word:str):
    filtered_list = filter(lambda x: key_word in x.lower(), all_values)
    return list(filtered_list)

In [5]:
def get_files_name(directory:str, filter_extension:list=None)->list:
    
    # directory = f'/home/amanuel_zewdu/creative_image_optimization/data/Challenge_Data/Assets/{directory}/'
    
    files = []
    for filename in os.listdir(directory):
        f = os.path.join(directory, filename)
        
        # checking if it is a file
        if filter_extension != None:
            if os.path.isfile(f):
                if filename.split('.')[-1].upper() in filter_extension:
                    files.append(filename)
        else:
            if os.path.isfile(f):
                files.append(filename)

    return files

In [6]:
def locate_image_on_image(locate_image: str, on_image: str, prefix: str = '', visualize: bool = False, color: Tuple[int, int, int] = (0, 0, 255)):
    try:

        image = cv2.imread(on_image)
        gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        shape = gray.shape

        
        template = cv2.imread(locate_image, 0)

        result = cv2.matchTemplate(gray, template, cv2.TM_CCOEFF)
        _, _, _, max_loc = cv2.minMaxLoc(result)

        height, width = template.shape[:2]

        top_left = max_loc
        bottom_right = (top_left[0] + width, top_left[1] + height)

        if visualize:
            cv2.rectangle(image, top_left, bottom_right, color, 1)
            plt.figure(figsize=(10, 10))
            plt.axis('off')
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            plt.imshow(image)
        
        return (shape,top_left,bottom_right)
            
        
        # return {f'{prefix}top_left_pos': top_left, f'{prefix}bottom_right_pos': bottom_right}

    except cv2.error as err:
        # print(err)
        top_left = (0,0)
        bottom_right = (0,0)
        shape = (0,0)

        return (shape,top_left,bottom_right)

In [7]:
def find_logo_position(folder_id:str,candidate_logo:list,ntry=0):
    
    prospect_on_image_names = ["preview","endframe"]
    max_try = len(prospect_on_image_names)

    try:

        img_path = glob.glob(f'{DIRECTORY_PATH}{folder_id}/*{prospect_on_image_names[ntry]}*.*')[0]
        logo_img = f'{DIRECTORY_PATH}{folder_id}/{candidate_logo[0]}'

        shape, top_left, bottom_right = locate_image_on_image(locate_image=logo_img, on_image=img_path)
        return (shape, top_left, bottom_right)
    
    except:
        n = ntry + 1
        if n < max_try:
            find_logo_position(folder_id,candidate_logo,n)
        return ((0,0), (0,0), (0,0))


### identify assets with logo information

In [8]:
bucket = "s3://10ac-batch-6/data/w11/Challenge_Data.zip"
file_path = "Challenge_Data/performance_data.csv"

df = loader.load_csv(bucket,file_path)

In [9]:
logos = df.copy()
logos.drop(columns=["preview_link","ER","CTR"],inplace=True)

In [10]:
logos["all_files"] = df.game_id.apply(lambda x:get_files_name(f'/home/amanuel_zewdu/creative_image_optimization/data/Challenge_Data/Assets/{x}/'))

In [10]:
logos.head()

Unnamed: 0,game_id,all_files
1,bfbf7a2b7ac635e67877b1ab87fd6629,"[endtext.png, landing_2.jpg, video.mp4, end-ha..."
2,945010afbf9a55bbdffcb0895f946155,"[landing_2.jpg, video.mp4, end-mtlaurel.png, e..."
3,e68e20f592457b875ce29757ab855dfe,"[endtext.png, thumbnail.jpg, landing_2.jpg, en..."
4,adunit-nestle-purina-friskies-mob,"[logo.png, game_2.png, cta.png, engagement_ani..."
5,adunit-lionsgate-uwomt-user-slider-sensory-vid...,"[logo.png, cta.png, shade.png, engagement_anim..."


In [11]:
logos['concat'] = logos.all_files.apply(lambda x: " ".join(x))

In [12]:
logos.concat.str.lower().str.contains("logo").value_counts()

False    474
True     431
Name: concat, dtype: int64

only 431 creative assets directory contains information about logo

In [13]:
contain_logo = logos[logos.concat.str.lower().str.contains("logo")]

In [82]:
contain_logo.sample(5)

Unnamed: 0,game_id,all_files,concat
44,adunit-gme-user-choice-mpu,"[game_text_2.png, logo.png, game_2.png, 3-img-...",game_text_2.png logo.png game_2.png 3-img-high...
631,b3fd525be390ec4cbebdcbd225362f12,"[landingtext.png, logo.png, video.mp4, cta.png...",landingtext.png logo.png video.mp4 cta.png eng...
209,adunit-mouser-user-choice-flight4-uk-cpe-mob,"[logo.png, landing_3.png, landingtext_3.png, c...",logo.png landing_3.png landingtext_3.png cta.p...
650,adunit-lexus-video-carousel-cpe-av-mob,"[logo.png, rx-copy.png, cta.png, engagement_an...",logo.png rx-copy.png cta.png engagement_animat...
512,adunit-mouser-user-choice-v3-cpe-us-mpu,"[logo.png, landing_3.png, background.png, cta....",logo.png landing_3.png background.png cta.png ...


In [14]:
not_contain_logo = logos[~logos.concat.str.lower().str.contains("logo")]
not_contain_logo.sample(5)

Unnamed: 0,game_id,all_files,concat
208,fac33ac29ac616dfa0053558c1a3c03c,"[landing_2.jpg, endframe_4.png, video.mp4, cta...",landing_2.jpg endframe_4.png video.mp4 cta.png...
737,44c8e52abc427a52c487122232409f5d,"[thumbnail.jpg, landing_2.jpg, video.mp4, MPU-...",thumbnail.jpg landing_2.jpg video.mp4 MPU-clic...
463,d982317438736aac9cb37b3e66a2f555,"[video.mp4, endd.png, cta.png, engagement_anim...",video.mp4 endd.png cta.png engagement_animatio...
28,61eb910425767fd90074a958328bee6a,"[thumbnail.jpg, landing_2.jpg, video.mp4, MPU-...",thumbnail.jpg landing_2.jpg video.mp4 MPU-clic...
846,b22916b05fb1e90b8b8d33812483022c,"[thumbnail.jpg, landing_2.jpg, endframe_4.png,...",thumbnail.jpg landing_2.jpg endframe_4.png vid...


### work with the first asset group

In [None]:
# img_path = "/home/amanuel_zewdu/creative_image_optimization/data/Challenge_Data/Assets/2524439faeafa1c2ca1a27cac00a97b1/_preview.png"
# logo_path = "/home/amanuel_zewdu/creative_image_optimization/data/Challenge_Data/Assets/2524439faeafa1c2ca1a27cac00a97b1/logo.png"

img_path = "/home/amanuel_zewdu/creative_image_optimization/data/Challenge_Data/Assets/79c36d2adb94900291f5ddf1f6580c43/endframe_2.png"
logo_path = "/home/amanuel_zewdu/creative_image_optimization/data/Challenge_Data/Assets/79c36d2adb94900291f5ddf1f6580c43/logo.png"

In [15]:
contain_logo['file_name'] = contain_logo.all_files.apply(lambda x: filter_list(x,"logo"))

In [16]:
contain_logo.drop(columns=["all_files","concat"],inplace=True)

In [17]:
contain_logo.sample(5)

Unnamed: 0,game_id,file_name
8,adunit-lionsgate-spiral-puzzle-v2-mpu,[logo.png]
766,adunit-lionsgate-uwomt-user-slider-sensory-vid...,[logo.png]
221,c4b651056516bc245b6b0dd78c2b192c,[video-logo.png]
206,3d8be6f0a1e1c248a02befc0dd3f6bec,"[logo.png, logo_end.png, 1logos.png, logoends...."
147,269d061e1b507338d8b4c69676f9ffa1,[logo.png]


In [173]:
back_up_logo_list = contain_logo.copy()
contain_logo[["shape","top_left","bottom_right"]] =  contain_logo.apply(lambda x: pd.Series(find_logo_position(x.game_id,x.file_name)) ,axis = 1)

In [172]:
contain_logo.drop(columns=['shape','top_left','bottom_right'],inplace=True)

In [175]:
contain_logo.sample(5)

Unnamed: 0,game_id,file_name,shape,top_left,bottom_right
105,f6c6774bb267182cf248fd64a5b25b59,"[logo.png, f5-logo.png]","(900, 600)","(135, 21)","(466, 85)"
318,adunit-mouser-user-choice-cpe-av-uk-mpu,[logo.png],"(0, 0)","(0, 0)","(0, 0)"
884,adunit-mouser-user-choice-v2-cpe-av-uk-mpu,[logo.png],"(0, 0)","(0, 0)","(0, 0)"
545,b378c281edcaebfbe47a3104fdc2e036,[logo.png],"(900, 600)","(162, 39)","(438, 86)"
101,adunit-city-square-mall-cny-mob,[logo.png],"(0, 0)","(0, 0)","(0, 0)"
