In [32]:
import pandas as pd
from pytesseract import pytesseract
import cv2
import numpy as np
from typing import List, Tuple
import matplotlib.pyplot as plt
from PIL import Image

import os
import sys
import glob

pytesseract.tesseract_cmd = r'/usr/bin/tesseract'

import warnings
warnings.simplefilter("ignore")

In [3]:
sys.path.append("../scripts")
from load_data import Loader

loader = Loader()

In [4]:
DIRECTORY_PATH = "/home/amanuel_zewdu/creative_image_optimization/data/Challenge_Data/Assets/"

IMAGES_EXT = ["JPG","PNG","GIF","WEBP","TIFF","PSD","RAW","BMP","HEIF","INDD","JPEG"]

VIDEO_EXT = ["WEBM","MPG","MP2","MPEG","MPE","MPV","OGG","MP4","M4P","M4V","AVI","WMV","MOV","QT","FLV","SWF"]

### methods

In [5]:
def filter_list(all_values:list,key_word:str):
    filtered_list = filter(lambda x: key_word in x.lower(), all_values)
    return list(filtered_list)

In [6]:
def get_files_name(directory:str, filter_extension:list=None)->list:
    
    # directory = f'/home/amanuel_zewdu/creative_image_optimization/data/Challenge_Data/Assets/{directory}/'
    
    files = []
    for filename in os.listdir(directory):
        f = os.path.join(directory, filename)
        
        # checking if it is a file
        if filter_extension != None:
            if os.path.isfile(f):
                if filename.split('.')[-1].upper() in filter_extension:
                    files.append(filename)
        else:
            if os.path.isfile(f):
                files.append(filename)

    return files

In [7]:
def locate_image_on_image(locate_image: str, on_image: str, prefix: str = '', visualize: bool = False, color: Tuple[int, int, int] = (0, 0, 255)):
    try:

        image = cv2.imread(on_image)
        gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        shape = gray.shape

        
        template = cv2.imread(locate_image, 0)

        result = cv2.matchTemplate(gray, template, cv2.TM_CCOEFF)
        _, _, _, max_loc = cv2.minMaxLoc(result)

        height, width = template.shape[:2]

        top_left = max_loc
        bottom_right = (top_left[0] + width, top_left[1] + height)

        if visualize:
            cv2.rectangle(image, top_left, bottom_right, color, 1)
            plt.figure(figsize=(10, 10))
            plt.axis('off')
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            plt.imshow(image)
        
        return (shape,top_left,bottom_right)
            
        
        # return {f'{prefix}top_left_pos': top_left, f'{prefix}bottom_right_pos': bottom_right}

    except cv2.error as err:
        # print(err)
        # top_left = (0,0)
        # bottom_right = (0,0)
        # shape = (0,0)

        # return (shape,top_left,bottom_right)
        raise Exception

In [107]:
def find_logo_position(folder_id:str,candidate_logo:list,ntry=0):
    
    prospect_on_image_names = ["preview","endframe","game"]
    max_try = len(prospect_on_image_names)

    try:

        img_path = glob.glob(f'{DIRECTORY_PATH}{folder_id}/*{prospect_on_image_names[ntry]}*.*')[0]
        logo_img = f'{DIRECTORY_PATH}{folder_id}/{candidate_logo[0]}'

        shape, top_left, bottom_right = locate_image_on_image(locate_image=logo_img, on_image=img_path)
        return (shape, top_left, bottom_right)
    
    except Exception as e:
        n = ntry + 1
        if n < max_try:
            return find_logo_position(folder_id,candidate_logo,n)
        return ((0,0), (0,0), (0,0))


### identify assets with logo information

In [9]:
bucket = "s3://10ac-batch-6/data/w11/Challenge_Data.zip"
file_path = "Challenge_Data/performance_data.csv"

df = loader.load_csv(bucket,file_path)

In [10]:
logos = df.copy()
logos.drop(columns=["preview_link","ER","CTR"],inplace=True)

In [11]:
logos["all_files"] = df.game_id.apply(lambda x:get_files_name(f'/home/amanuel_zewdu/creative_image_optimization/data/Challenge_Data/Assets/{x}/'))

In [12]:
logos.head()

Unnamed: 0,game_id,all_files
1,bfbf7a2b7ac635e67877b1ab87fd6629,"[endtext.png, landing_2.jpg, video.mp4, end-ha..."
2,945010afbf9a55bbdffcb0895f946155,"[landing_2.jpg, video.mp4, end-mtlaurel.png, e..."
3,e68e20f592457b875ce29757ab855dfe,"[endtext.png, thumbnail.jpg, landing_2.jpg, en..."
4,adunit-nestle-purina-friskies-mob,"[logo.png, game_2.png, cta.png, engagement_ani..."
5,adunit-lionsgate-uwomt-user-slider-sensory-vid...,"[logo.png, cta.png, shade.png, engagement_anim..."


In [13]:
logos['concat'] = logos.all_files.apply(lambda x: " ".join(x))

In [14]:
logos.concat.str.lower().str.contains("logo").value_counts()

False    474
True     431
Name: concat, dtype: int64

only 431 creative assets directory contains information about logo

In [15]:
contain_logo = logos[logos.concat.str.lower().str.contains("logo")]

In [16]:
contain_logo.sample(5)

Unnamed: 0,game_id,all_files,concat
62,adunit-chevy-tap-and-hold-600-1200-mob,"[vidbg.png, homecharginglegal.png, logo.png, v...",vidbg.png homecharginglegal.png logo.png video...
630,f1cc75828bb3b7419b0d210e971a7cdf,"[fg-fixed.png, landingtext.png, logo.png, vide...",fg-fixed.png landingtext.png logo.png video.mp...
267,3f749b8fd9e49bca68b5fdc0ca391cd8,"[end2.png, engagement_instruction_en.png, ligh...",end2.png engagement_instruction_en.png light.p...
107,adunit-the-botanist-gin-mpu,"[endtext.png, endframe_4.png, logo.png, gamete...",endtext.png endframe_4.png logo.png gametext_7...
573,286ce285a3a28445c972ba9d09090937,"[logo.png, video.mp4, cta.png, engagement_anim...",logo.png video.mp4 cta.png engagement_animatio...


In [17]:
not_contain_logo = logos[~logos.concat.str.lower().str.contains("logo")]
not_contain_logo.sample(5)

Unnamed: 0,game_id,all_files,concat
338,adunit-windhoek-beer-year-end-2021-bio-v2-mob,"[game_2.png, landing_3.png, game_3.png, game_4...",game_2.png landing_3.png game_3.png game_4.png...
136,adunit-ggpoker-gamification-user-choice-mpu,"[end.jpg, fold-emoji-1.png, allin-card-5-blur....",end.jpg fold-emoji-1.png allin-card-5-blur.png...
415,9155a2e9b6ccf3449b2403031ed99468,"[video.mp4, video-cta.jpg, rev-video-cta-getti...",video.mp4 video-cta.jpg rev-video-cta-getticke...
297,be3f931b8039ef187f1dcfa3d488c041,"[video.mp4, end-cta-mpu-swap-1.png, cta.png, _...",video.mp4 end-cta-mpu-swap-1.png cta.png _prev...
411,68c52769f63d072ef64e0dde04ed5ff0,"[video.mp4, cta.png, engagement_animation.png,...",video.mp4 cta.png engagement_animation.png lan...


### work with the first asset group

In [18]:
# img_path = "/home/amanuel_zewdu/creative_image_optimization/data/Challenge_Data/Assets/2524439faeafa1c2ca1a27cac00a97b1/_preview.png"
# logo_path = "/home/amanuel_zewdu/creative_image_optimization/data/Challenge_Data/Assets/2524439faeafa1c2ca1a27cac00a97b1/logo.png"

img_path = "/home/amanuel_zewdu/creative_image_optimization/data/Challenge_Data/Assets/79c36d2adb94900291f5ddf1f6580c43/endframe_2.png"
logo_path = "/home/amanuel_zewdu/creative_image_optimization/data/Challenge_Data/Assets/79c36d2adb94900291f5ddf1f6580c43/logo.png"

In [19]:
contain_logo['file_name'] = contain_logo.all_files.apply(lambda x: filter_list(x,"logo"))

In [20]:
contain_logo.drop(columns=["all_files","concat"],inplace=True)

In [21]:
contain_logo.sample(5)

Unnamed: 0,game_id,file_name
766,adunit-lionsgate-uwomt-user-slider-sensory-vid...,[logo.png]
224,050fd94fd028e2b766dcbfa5a9586d2c,"[logo1.png, logo-swap.png, logo2.png, logo-tab..."
267,3f749b8fd9e49bca68b5fdc0ca391cd8,[endlogo.png]
812,adunit-chanel-j12-tw-white-en-2022-v1-mpu,[logo.png]
213,adunit-ge-biofuel-userchoice-mob,"[logo.png, end-logo.png]"


In [22]:
back_up_logo_list = contain_logo.copy()

In [108]:
contain_logo = back_up_logo_list.copy()

In [109]:

contain_logo[["shape","top_left","bottom_right"]] =  contain_logo.apply(lambda x: pd.Series(find_logo_position(x.game_id,x.file_name)) ,axis = 1)

In [25]:
# contain_logo.drop(columns=['shape','top_left','bottom_right'],inplace=True)

In [111]:
# contain_logo[contain_logo['shape'] == (0,0)]