In [1]:
import pandas as pd
import datetime
from transformers import pipeline
import json



In [2]:
arcom_df = pd.read_csv("/Users/thibaultrolland/Projects/arcom/data/ARCOM.csv")

  arcom_df = pd.read_csv("/Users/thibaultrolland/Projects/arcom/data/ARCOM.csv")


In [3]:
def clean_themes(themes):
    if type(themes) == str:
        themes = themes.split(',')
        themes = [theme.strip() for theme in themes]
        themes = [theme.lower() for theme in themes]
        return themes
    return []

arcom_df['clean_theme'] = arcom_df['Thème'].apply(clean_themes)
arcom_df['clean_date'] = [datetime.datetime.strptime(x, '%d/%m/%Y').date() for x in arcom_df['Date']]
arcom_df['visual_and_script'] = arcom_df['Visuel'] + arcom_df['Script']

In [4]:
food_sectors = ['BOISSONS']
food_df = arcom_df[[sector in food_sectors for sector in arcom_df['Secteur']]]
food_df = food_df[~food_df['Script'].isna()]
food_df = food_df[~food_df['Visuel'].isna()]
food_df['original_index'] = food_df.index
food_df = food_df.reset_index()

In [5]:
zeroshot_classifier = pipeline("zero-shot-classification", model="MoritzLaurer/deberta-v3-large-zeroshot-v2.0")


In [9]:
sample_len = len(food_df) #1076
sample = food_df.sample(sample_len)

In [6]:
themes = [
    'Goût',
    'Gourmandise',
    'santé',
    'Nutrition',
    'Diététique',
    'Origine',
    'qualité',
    'Naturel',
    'Simplicité',
    'convivialité',
    'amitié',
    'snacking',
    'festif',
    'savoir-faire',
    'sensation',
    'rapidité'
    ]


In [None]:
hypothesis_template = "Voici une description et un visuel pour une publicité, un de ses thèmes est : {}"

In [10]:
output_list = []


i = 0
for index, row in sample.iterrows():
    print(f'computing zero shot for ad : {i+1} / {sample_len}', end='\r')
    original_index = row['original_index']
    try:
        output= zeroshot_classifier(row['visual_and_script'], themes, hypothesis_template=hypothesis_template, multi_label=True)
        output['error'] = False
    except KeyboardInterrupt:
        print('Interrupted by user.')
    except:
        print(f'{i+1} / {sample_len}; error : couldn\'t proceed id {original_index}')
        output = {'error': True}
    output['id'] = original_index
    output_list.append(output)
    i += 1




computing zero shot for ad : 1076 / 1076

In [11]:
with open('11b_zeroshot_drinks_cognitivethemes.json', 'w') as json_file:
    json.dump(output_list, json_file)

# Emotions

In [10]:
emotion_themes = [
    'Joie',
    'Colère',
    'Confiance',
    'Sérénité',
    'Humour /ironie',
    'bonne humeur',
    'étonnement',
    'surprise',
    'plaisir',
    'Complicité'
    ]

emotion_hypothesis_template = "Voici une description et un visuel pour une publicité, son émotion est : {}"

In [11]:
output_list = []


i = 0
for index, row in sample.iterrows():
    print(f'computing zero shot for ad : {i+1} / {sample_len}', end='\r')
    original_index = row['original_index']
    try:
        output= zeroshot_classifier(row['visual_and_script'], emotion_themes, hypothesis_template=emotion_hypothesis_template, multi_label=True)
        output['error'] = False
    except KeyboardInterrupt:
        print('Interrupted by user.')
    except:
        print(f'{i+1} / {sample_len}; error : couldn\'t proceed id {original_index}')
        output = {'error': True}
    output['id'] = original_index
    output_list.append(output)
    i += 1



computing zero shot for ad : 1076 / 1076

In [12]:
with open('11b_zeroshot_drinks_emotions.json', 'w') as json_file:
    json.dump(output_list, json_file)

# Elements de mise en scène

In [14]:
visual_assets_themes = ['Mascotte',
    'Famille',
    'Saga',
    'Danse',
    'bonne humeur',
    'animaux',
    'Recette',
    'enfants',
    'ruralité',
    'parents',
    'interpellation',
    'Paysage']

In [15]:
visual_assets_hypothesis_template = "Voici un visuel pour une publicité, son thème est : {}"

In [16]:
output_list = []


i = 0
for index, row in sample.iterrows():
    print(f'computing zero shot for ad : {i+1} / {sample_len}', end='\r')
    original_index = row['original_index']
    try:
        output= zeroshot_classifier(row['Visuel'], visual_assets_themes, hypothesis_template=visual_assets_hypothesis_template, multi_label=True)
        output['error'] = False
    except KeyboardInterrupt:
        print('Interrupted by user.')
    except:
        print(f'{i+1} / {sample_len}; error : couldn\'t proceed id {original_index}')
        output = {'error': True}
    output['id'] = original_index
    output_list.append(output)
    i += 1



computing zero shot for ad : 1076 / 1076

In [17]:
with open('11b_zeroshot_drinks_visualassets.json', 'w') as json_file:
    json.dump(output_list, json_file)