In [163]:
import pandas as pd
import json
from utils import save_csv

In [164]:
models = ['gemma', 'llama3', 'mistral']

In [165]:
data_category_1 = {}
data_category_2 = {}
data_category_3 = {}
for model in models:
    data_category_1[model] = pd.read_csv(f"results/{model}_results/category_1.csv")
    data_category_2[model] = pd.read_csv(f"results/{model}_results/category_2.csv")
    data_category_3[model] = pd.read_csv(f"results/{model}_results/category_3.csv")

## Preprocess data and clean data


In [166]:
def clean_data(data):
    return data.replace(' ', '').replace('\'', '').replace('\"', '').replace('.', '')

In [167]:
def valid_response(data):
    valid_data = data.copy()
    original_len = len(valid_data)
    for row, value in data.iterrows():
        value['response'] = clean_data(value['response'])
        if not (value['response'] in value['anti_stereotype'] or value['response'] in value['stereotype'] or value['response'] in value['unrelated']):
            valid_data = valid_data.drop(row)
    print('There are', original_len - len(valid_data), 'invalid responses from', original_len, 'responses')
    return valid_data

In [168]:
for model in data_category_1:
    print('Model', model)
    data_category_1[model] = valid_response(data_category_1[model])
    save_csv(data_category_1[model], f"results/{model}_results/category_1_valid.csv")

Model gemma
There are 2063 invalid responses from 8728 responses
Model llama3
There are 1293 invalid responses from 8728 responses
Model mistral
There are 2922 invalid responses from 8728 responses


### Table 6


In [169]:
with open('data/cat_2.json') as f:
    countries = json.load(f)
high_income_countries = countries['luxury']
low_income_countries = countries['non_luxury']

def count_table_6(data):
    count_result = {'high_income_luxury': 0, 'high_income_non_luxury': 0, 'low_income_luxury': 0, 'low_income_non_luxury': 0}
    for row, value in data[model].iterrows():
            try:
                text = value['response'].split('brand', 1)
                if 'non' in text[0]:
                    count_result['high_income_non_luxury'] += 1
                else:
                    count_result['high_income_luxury'] += 1
                if 'non' in text[1]:
                    count_result['low_income_non_luxury'] += 1
                else:
                    count_result['low_income_luxury'] += 1
            except:
                print('Invalid country:', value['response'], 'from row', row)

    print('There are', count_result['high_income_luxury'], 'giving a luxury brand response for high income countries')
    print('There are', count_result['high_income_non_luxury'], 'giving a non-luxury brand response for high income countries')
    print('There are', count_result['low_income_luxury'], 'giving a luxury brand response for low income countries')
    print('There are', count_result['low_income_non_luxury'], 'giving a non-luxury brand response for low income countries')
    return count_result

In [170]:
data = {}
for model in data_category_2:
    print('Model', model)
    count_table_6(data_category_2)

Model gemma
There are 3844 giving a luxury brand response for high income countries
There are 0 giving a non-luxury brand response for high income countries
There are 617 giving a luxury brand response for low income countries
There are 3227 giving a non-luxury brand response for low income countries
Model llama3
There are 3785 giving a luxury brand response for high income countries
There are 59 giving a non-luxury brand response for high income countries
There are 108 giving a luxury brand response for low income countries
There are 3736 giving a non-luxury brand response for low income countries
Model mistral
There are 3391 giving a luxury brand response for high income countries
There are 453 giving a non-luxury brand response for high income countries
There are 453 giving a luxury brand response for low income countries
There are 3391 giving a non-luxury brand response for low income countries


### Table 7


In [171]:
def count_table_7(data):
    categories = ['shoes', 'beverages', 'electronics', 'clothing']
    country_type = ['high_income', 'low_income']
    count_result = {}

    for country in country_type:
        count_result[country] = {}
        for category in categories:
            count_result[country][category] = {}
            count_result[country][category]['luxury'] = 0
            count_result[country][category]['non_luxury'] = 0

    for row, value in data[model].iterrows():
        try:
            text = value['response'].split('brand', 1)
            if 'non' in text[0]:
                count_result['high_income'][value['brand_name']]['non_luxury'] += 1
            else:
                count_result['high_income'][value['brand_name']]['luxury'] += 1
            if 'non' in text[1]:
                count_result['low_income'][value['brand_name']]['non_luxury'] += 1
            else:
                count_result['low_income'][value['brand_name']]['luxury'] += 1
        except:
            print('Invalid country:', value['response'], 'from row', row)

    for country in country_type:
        for category in categories:
            print('Country:', country)
            print('Category:', category)
            print('There are', count_result[country][category]['luxury'], 'giving a luxury brand response')
            print('There are', count_result[country][category]['non_luxury'], 'giving a non-luxury brand response')
    return count_result

In [172]:
print(len(data_category_2['gemma']))
for model in data_category_2:
    print('Model', model)
    count_table_7(data_category_2)

3844
Model gemma
Country: high_income
Category: shoes
There are 961 giving a luxury brand response
There are 0 giving a non-luxury brand response
Country: high_income
Category: beverages
There are 961 giving a luxury brand response
There are 0 giving a non-luxury brand response
Country: high_income
Category: electronics
There are 961 giving a luxury brand response
There are 0 giving a non-luxury brand response
Country: high_income
Category: clothing
There are 961 giving a luxury brand response
There are 0 giving a non-luxury brand response
Country: low_income
Category: shoes
There are 86 giving a luxury brand response
There are 875 giving a non-luxury brand response
Country: low_income
Category: beverages
There are 352 giving a luxury brand response
There are 609 giving a non-luxury brand response
Country: low_income
Category: electronics
There are 38 giving a luxury brand response
There are 923 giving a non-luxury brand response
Country: low_income
Category: clothing
There are 141 giv

### Table 8


In [173]:
def count_table_8(data):
    count_result = {'global brand': 0, 'local brand': 0}
    for row, value in data[model].iterrows():
        if 'global' in value['response']:
            count_result['global brand'] += 1
        elif 'local' in value['response']:
            count_result['local brand'] += 1
        else:
            print('Invalid response:', value['response'], 'from row', row)

    print('There are', count_result['global brand'], 'giving a global brand response')
    print('There are', count_result['local brand'], 'giving a local brand response')
    return count_result

In [174]:
for model in data_category_3:
    print('Model', model)
    count_table_8(data_category_3)

Model gemma
There are 593 giving a global brand response
There are 179 giving a local brand response
Model llama3
Invalid response: i cannot suggest a specific electronic device brand. from row 419
There are 116 giving a global brand response
There are 655 giving a local brand response
Model mistral
There are 182 giving a global brand response
There are 590 giving a local brand response


### Table 9


In [175]:
def count_table_9(data):
    categories = ['shoes', 'beverages', 'electronics', 'clothing']
    count_result = {}
    for category in categories:
        count_result[category] = {'global brand': 0, 'local brand': 0}
    for row, value in data[model].iterrows():
        if 'global' in value['response']:
            count_result[value['brand_name']]['global brand'] += 1
        elif 'local' in value['response']:
            count_result[value['brand_name']]['local brand'] += 1
        else:
            print('Invalid response:', value['response'], 'from row', row)

    for category in categories:
        print('Category:', category)
        print('There are', count_result[category]['global brand'], 'giving a global brand response')
        print('There are', count_result[category]['local brand'], 'giving a local brand response')

    return count_result

In [176]:
for model in data_category_3:
    print('Model', model)
    count_table_9(data_category_3)

Model gemma
Category: shoes
There are 193 giving a global brand response
There are 0 giving a local brand response
Category: beverages
There are 54 giving a global brand response
There are 139 giving a local brand response
Category: electronics
There are 193 giving a global brand response
There are 0 giving a local brand response
Category: clothing
There are 153 giving a global brand response
There are 40 giving a local brand response
Model llama3
Invalid response: i cannot suggest a specific electronic device brand. from row 419
Category: shoes
There are 24 giving a global brand response
There are 169 giving a local brand response
Category: beverages
There are 15 giving a global brand response
There are 178 giving a local brand response
Category: electronics
There are 44 giving a global brand response
There are 148 giving a local brand response
Category: clothing
There are 33 giving a global brand response
There are 160 giving a local brand response
Model mistral
Category: shoes
There