In [4]:
pip install opencv-python
pip install spacy
pip install pytesseract
!python -m spacy download en_core_web_sm

Collecting opencv-python
  Obtaining dependency information for opencv-python from https://files.pythonhosted.org/packages/66/82/564168a349148298aca281e342551404ef5521f33fba17b388ead0a84dc5/opencv_python-4.10.0.84-cp37-abi3-macosx_11_0_arm64.whl.metadata
  Downloading opencv_python-4.10.0.84-cp37-abi3-macosx_11_0_arm64.whl.metadata (20 kB)
Downloading opencv_python-4.10.0.84-cp37-abi3-macosx_11_0_arm64.whl (54.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.8/54.8 MB[0m [31m38.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: opencv-python
Successfully installed opencv-python-4.10.0.84
Note: you may need to restart the kernel to use updated packages.


In [23]:
import pandas as pd
import numpy as np
import pytesseract
import spacy
import cv2
import os

from PIL import Image

In [2]:
pytesseract.pytesseract.tesseract_cmd = '/opt/homebrew/bin/tesseract' 

In [3]:
!export PATH=$PATH:/opt/homebrew/bin/tesseract

## Exploring Datasets

In [4]:
sample_df = pd.read_csv('Sample.csv')
ground_truth_df = pd.read_csv('ground-truth.csv')

In [5]:
sample_df.head(2)

Unnamed: 0,creative_data_id,creative_data_title,creative_data_description,creative_data_duration,creative_data_lifetime_spend_estimated,creative_data_lifetime_airings_count,creative_data_airing_date_first_et,creative_data_airing_date_last_et,speech
0,2194673,30s Kim's Discount - 2194673,Kim is going for the State Farm Drive Safe & S...,30,29789808.73,13949,2019-04-06T22:19:06-04:00,2020-08-04T18:42:50-04:00,"So Kim, you going for a big drive safe and sav..."
1,2142915,30s New Flat - 2142915,Uncomfortable with her shabby apartment and ro...,30,5423001.7,10132,2019-03-04T06:49:02-05:00,2021-08-03T11:12:36-04:00,Check your credit scores for free and learn ho...


In [6]:
ground_truth_df.head(2)

Unnamed: 0,Timestamp,creative_data_id,"Is there a call to go online (e.g., shop online, visit the Web)?","Is there online contact information provided (e.g., URL, website)?","Is there a visual or verbal call to purchase (e.g., buy now, order now)?","Does the ad portray a sense of urgency to act (e.g., buy before sales ends, order before ends)?","Is there an incentive to buy (e.g., a discount, a coupon, a sale or ""limited time offer"")?","Is there offline contact information provided (e.g., phone, mail, store location)?",Is there mention of something free?,"Does the ad mention at least one specific product or service (e.g., model, type, item)?",...,Was there a famous person in this ad?,"If yes to the above, write the name of the famous person, if known.",What happened in this ad? (Answer in 2-3 sentences each),What was/were the company's goal(s) with this ad? Choose (potentially multiple) from:,How successful was the ad in achieving its goal(s)?,"How much did you like the ad? (1. Strongly dislike, 2. Dislike, 3. Neither Like or Dislike, 4. Like, 5. Strongly Like)","What was the slogan presented in the ad, if any?","After addressing the specific survey items, write a general description of the ad. You can use answers to the questions above to formulate your answer. Your description should include:\nBrand and Product Identification: \nSpecify the brand and whether a product is being advertised. (1 sentence)\nVisual Elements: Describe what is seen on the screen, including setting, characters, and any text or graphics. (max 2 sentences)\nAuditory Elements: Note what is heard, such as dialogue, voice-over, music, or sound effects. (max 2 sentences)\n",Any additional feedback or things we should be aware of?,Please enter the video identifier one more time (e.g. 123456789.mp4)
0,5/16/2024 8:00:14,1471363,No,Yes,No,No,No,No,No,Yes,...,No,,During this ad we had a man and a woman taking...,Change how consumers feel about the product/br...,3,3,,Product Identification: Mini Countryman SUV\nM...,I believe the text legibility should be improv...,1471363.mp4
1,5/23/2024 2:35:55,1471363,No,Yes,No,No,No,No,No,Yes,...,No,,We watch Mini USA new Countryman driver around...,Directly persuade consumers to purchase,2,2,,Mini USA is advertising their new Countryman c...,,1471363.mp4


### Extracting Textual Features

In [7]:
sample_df.columns

Index(['creative_data_id', 'creative_data_title', 'creative_data_description',
       'creative_data_duration', 'creative_data_lifetime_spend_estimated',
       'creative_data_lifetime_airings_count',
       'creative_data_airing_date_first_et',
       'creative_data_airing_date_last_et', 'speech'],
      dtype='object')

In [8]:
#Extracting Features from Descriptions and Transcriptions
nlp = spacy.load("en_core_web_sm")

# Example: Process a single description
description = sample_df['creative_data_description'].iloc[0]
doc = nlp(description)

# Extract entities and keywords
entities = [(ent.text, ent.label_) for ent in doc.ents]
keywords = [token.lemma_ for token in doc if token.is_alpha and not token.is_stop]

print("Entities:", entities)
print("Keywords:", keywords)

Entities: [('Kim', 'PERSON'), ('the State Farm Drive Safe & Save', 'ORG'), ('State Farm', 'ORG'), ('up to 30 percent', 'PERCENT')]
Keywords: ['Kim', 'go', 'State', 'Farm', 'Drive', 'Safe', 'Save', 'Discount', 'app', 'say', 'vigilant', 'refuse', 'speed', 'meeting', 'start', 'need', 'use', 'restroom', 'go', 'labor', 'refuse', 'let', 'anybody', 'mess', 'discount', 'State', 'Farm', 'say', 'discount', 'percent']


In [24]:
#Extracting Features from On-Screen Text - We will use Optical Character Recognition (OCR) to extract text from video frames.

# Function to extract text from a video frame
def extract_text_from_frame(frame):
    pil_image = Image.fromarray(frame)
    text = pytesseract.image_to_string(pil_image)
    return text

# Function to extract key frames from a video
def extract_key_frames(video_path, num_frames=5):
    video = cv2.VideoCapture(video_path)
    total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_indices = np.linspace(0, total_frames-1, num_frames, dtype=int)
    key_frames = []

    for frame_idx in frame_indices:
        video.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
        ret, frame = video.read()
        if ret:
            key_frames.append(frame)

    video.release()
    return key_frames

# Extract key frames and text from the example video
video_path = 'sample/1471363.mp4'
key_frames = extract_key_frames(video_path)
frame_texts = [extract_text_from_frame(frame) for frame in key_frames]

print("Extracted Text from Frames:")
for i, text in enumerate(frame_texts):
    print(f"Frame {i+1}: {text}")

Extracted Text from Frames:
Frame 1: a 2 | Fr

Frame 2: ADD

Frame 3: 
Frame 4: 
Frame 5: miniusa.com/newcountryman

© 2017 MINI USA, « division of BMW of North America, LLC. The MINI name, model names, and logo are registered trademarks.




### Exploring the Video Files

In [25]:
# Function to extract video information
def get_video_info(video_path):
    video = cv2.VideoCapture(video_path)
    if not video.isOpened():
        return None
    info = {
        'Frame Width': int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),
        'Frame Height': int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)),
        'Frame Rate': video.get(cv2.CAP_PROP_FPS),
        'Frame Count': int(video.get(cv2.CAP_PROP_FRAME_COUNT)),
        'Duration (seconds)': int(video.get(cv2.CAP_PROP_FRAME_COUNT)) / video.get(cv2.CAP_PROP_FPS)
    }
    video.release()
    return info

In [26]:
# Example
video_path = 'sample/1471363.mp4'
video_info = get_video_info(video_path)
print(f"Information for video {os.path.basename(video_path)}:")
print(video_info)

Information for video 1471363.mp4:
{'Frame Width': 1280, 'Frame Height': 720, 'Frame Rate': 29.97002997002997, 'Frame Count': 900, 'Duration (seconds)': 30.03}
