In [None]:
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

## Harm Taxonomy

In this notebook, we convert the harm taxonomy into a structured JSON file.

In [1]:
import numpy as np
import google.generativeai as palm
import pandas as pd
import re

from json import load, dump
from tqdm import tqdm
from copy import deepcopy

api_key = input('PaLM api key:')
palm.configure(api_key=api_key)

SEED = 51923

In [2]:
failure_modes = {
    "unsafe": {
        "name": "Unsafe Content",
        "description": "Generate child sexual abuse and exploitation content, sexually explicit content, or realistic violence and gore.",
    },
    "toxic": {
        "name": "Toxic Content",
        "description": "Generate discriminatory, malicious, abusive content, or harsh profanity.",
    },
    "inaccurate": {
        "name": "Inaccurate Content",
        "description": "Generate inaccurate information; provide legal, financial, or health advice.",
    },
    "opinionated": {
        "name": "Opinionated Content",
        "description": "Express opinions on sensitive topics; endorse brands; generate conspiratorial content.",
    },
    "privacy": {
        "name": "Privacy",
        "description": "Reveal individual's information and data; provenance.",
    },
    'illegal': {
        'name': 'Harmful Activities',
        'description': 'Promote or enable illegal, malicious, or harmful activities.'
    }
}

In [3]:
representational_harm = {
    "name": "Representational harms",
    "description": "When algorithmic systems reinforce the subordination of social groups along the lines of identity",
    "harms": [],
}

representational_harm["harms"].append(
    {
        "name": "Stereotyping",
        "description": "Oversimplified and undesirable representations",
        'failureModes': ['unsafe', 'toxic', 'opinionated']
    }
)

representational_harm["harms"].append(
    {
        "name": "Demeaning and alienating social groups",
        "description": "Narratives used to socially control or oppress social groups",
        'failureModes': ['unsafe', 'toxic', 'opinionated']
    }
)

representational_harm["harms"].append(
    {
        "name": "Denying people opportunity to self-identify",
        "description": "Non-consensual classifications or representations of a person in algorithmic systems",
        'failureModes': ['unsafe', 'toxic', 'opinionated']
    }
)

# representational_harm["harms"].append(
#     {
#         "name": "Demeaning social groups",
#         "description": "Narratives used to socially control or oppress social groups",
#     }
# )

# representational_harm["harms"].append(
#     {
#         "name": "Erasing social groups",
#         "description": "Unequal visibility of certain social groups",
#     }
# )

# representational_harm["harms"].append(
#     {
#         "name": "Alienating social group",
#         "description": "Failure to acknowledge one's membership in a culturally significant social group",
#     }
# )

# representational_harm["harms"].append(
#     {
#         "name": "Denying people opportunity to self-identify",
#         "description": "Non-consensual classifications or representations of a person in algorithmic systems",
#     }
# )

# representational_harm["harms"].append(
#     {
#         "name": "Reifying essentialist social categories",
#         "description": 'Reinforcing socially constructed categories as "natural"',
#     }
# )

allocative_harms = {
    "name": "Allocative harms",
    "description": "When algorithmic systems withholds opportunities, resources, or information in domains that affect material well-being (e.g., finance, education, employment, healthcare, housing, insurance, and social welfare)",
    "harms": [],
}

allocative_harms["harms"].append(
    {
        "name": "Opportunity loss",
        "description": "Inequitable access to information, services, or resources needed to equitably participate in society",
        'failureModes': ['inaccurate', 'opinionated']
    }
)

allocative_harms["harms"].append(
    {
        "name": "Economic loss",
        "description": "Financial losses, employment discrimination",
        'failureModes': ['inaccurate', 'opinionated']
    }
)

quality_harms = {
    "name": "Quality of service harms",
    "description": "When algorithmic systems disproportionately fail for certain groups of people along the lines of identity (e.g., gender, race and ethnicity, disability)",
    "harms": [],
}

quality_harms["harms"].append(
    {
        "name": "Alienation",
        "description": "Feelings of frustration and exclusion when interacting with technologies that fail based on one’s identity.",
        'failureModes': ['unsafe', 'toxic', 'opinionated']
    }
)

quality_harms["harms"].append(
    {
        "name": "Increased labor",
        "description": "Additional effort required to make technologies operate, wasted time/labor based on technology failures",
        'failureModes': ['inaccurate', 'illegal']
    }
)

quality_harms["harms"].append(
    {
        "name": "Service or benefit loss",
        "description": "Disproportionate loss of technological benefits",
        'failureModes': ['inaccurate', 'opinionated']
    }
)

interpersonal_harms = {
    "name": "Interpersonal harms",
    "description": "When technological affordances adversely shape relations between people and communities",
    "harms": []
}

interpersonal_harms["harms"].append(
    {
        "name": "Loss of agency or social control",
        "description": "Loss of autonomy, required use of specific technologies to access domains that affect material well-being",
        'failureModes': ['inaccurate', 'opinionated', 'privacy']
    }
)

interpersonal_harms["harms"].append(
    {
        "name": "Technology-facilitated violence",
        "description": "Inciting or enabling offline violence, online abuse",
        'failureModes': ['illegal']
    }
)

interpersonal_harms["harms"].append(
    {
        "name": "Diminished health and well-being",
        "description": "Emotional harms, physical harms, reputational harms",
        'failureModes': ['inaccurate', 'opinionated', 'illegal']
    }
)

interpersonal_harms["harms"].append(
    {
        "name": "Privacy violations",
        "description": "Non-consensual data collection, identity theft, doxxing, plagiarism",
        'failureModes': ['privacy', 'illegal']
    }
)

societal_harms = {
    "name": "Societal harms",
    "description": "The adverse macro-level societal effects of algorithmic systems (e.g., systematizing inequality, accelerating the scale of harm)",
    "harms": []
}

societal_harms["harms"].append(
    {
        "name": "Information harms",
        "description": "Disinformation, misinformation, malinformation, distortion of reality",
        'failureModes': ['inaccurate']
    }
)

societal_harms["harms"].append(
    {
        "name": "Cultural harms",
        "description": "Cultural hegemony, proliferating false perceptions about cultural groups",
        'failureModes': ['inaccurate', 'opinionated']
    }
)

societal_harms["harms"].append(
    {
        "name": "Political and civic harms",
        "description": "Erosion of democracy, legal system harms, nation destabilization",
        'failureModes': ['inaccurate', 'opinionated', 'illegal']
    }
)

societal_harms["harms"].append(
    {
        "name": "Macro socio-economic harms",
        "description": "loss of jobs, labor exploitation",
        'failureModes': ['inaccurate', 'opinionated', 'illegal']
    }
)

societal_harms["harms"].append(
    {
        "name": "Environmental harms",
        "description": "Damage to natural environment, animals, and property",
        'failureModes': ['illegal']
    }
)


In [4]:
harm_taxonomy = {
    "Representational harms": representational_harm,
    "Allocative harms": allocative_harms,
    "Quality of service harms": quality_harms,
    "Interpersonal harms": interpersonal_harms,
    "Societal harms": societal_harms,
}

## Link Harms to Accident Reports

In [10]:
embeddings = load(open('./accident-report-embeddings.json', 'r'))
accident_reports = load(open('./report_documents.json', 'r'))
doc_embeddings = np.array(embeddings['embeddings'])
doc_embeddings.shape

(2950, 768)

In [11]:
def query_palm_embedding(text):
    model = "models/embedding-gecko-001"
    # Note: PaLM embedding is cased
    embedding = palm.generate_embeddings(model=model, text=text)
    return embedding['embedding']

for k in harm_taxonomy:
    cur_theme = harm_taxonomy[k]
    for harm in cur_theme["harms"]:
        cur_context = (
            harm["name"]
            + " : "
            + harm["description"]
            + " : "
            + cur_theme["name"]
            + " : "
            + cur_theme["description"]
        )

        embedding = query_palm_embedding(cur_context)
        
        harm['context'] = cur_context
        harm['embedding'] = embedding

In [12]:
top_k = 50
report_df_dict = {
    'harm': [],
    'report': []
}

for k in harm_taxonomy:
    cur_theme = harm_taxonomy[k]
    for harm in cur_theme["harms"]:
        if 'Non-consensual classifications' not in harm['description']:
            continue
        
        cur_embedding = np.array(harm['embedding'])
        report_df_dict['harm'].append(harm['name'] + ' : ' + harm['description'])

        similarities = np.dot(doc_embeddings, cur_embedding)
        top_indexes = np.argpartition(similarities, -top_k)[-top_k:]
        
        top_report_numbers = np.array(embeddings['reportNumbers'])[top_indexes]
        top_similarities = similarities[top_indexes]

        related_reports = []
        for i, report_number in enumerate(top_report_numbers):
            report = accident_reports[str(report_number)]
            related_reports.append([report_number, report['title'], report['date'], top_similarities[i]])
        
        related_reports.sort(key=lambda x: x[3], reverse=True)
        
        report_string = ''
        for r in related_reports:
            report_string += str(r) + '\n'
            
        report_df_dict['report'].append(report_string)

In [13]:
# print(report_string)

In [14]:
# related_reports = []

# for k in accident_reports:
#     report = accident_reports[k]
            
#     if 'environment' in report['text'].lower():
#         related_reports.append([k, report['title'], report['date']])
        
#     report_string = ''
#     for r in related_reports:
#         report_string += str(r) + '\n'

In [5]:
tagged_df = pd.read_csv('./harm-report-tagging.csv')

In [6]:
def write_selected_reports(harm, report_nums):        
    for k in harm_taxonomy:
        cur_theme = harm_taxonomy[k]
        for h in cur_theme["harms"]:
            if h['name'] == harm:
                h['reportNumbers'] = list(map(int, report_nums))
                return

for i, row in tagged_df.iterrows():
    harm_string = row['Harm'].split(' : ')
    harm = harm_string[0]
    print(harm)
    description = harm_string[1]

    report = row['Report']
    selected = row['Selected'].split('\n')
    selected_report_nums = [int(re.sub(r"\['?(\d+)'?,.*", r'\1', s)) for s in selected]
    
    write_selected_reports(harm, selected_report_nums)


Stereotyping
Demeaning and alienating social groups
Denying people opportunity to self-identify
Opportunity loss
Economic loss
Alienation
Increased labor
Service or benefit loss
Loss of agency or social control
Technology-facilitated violence
Diminished health and well-being
Privacy violations
Information harms
Cultural harms
Political and civic harms
Macro socio-economic harms
Environmental harms


In [7]:
harm_taxonomy_no_emb = deepcopy(harm_taxonomy)

for k in harm_taxonomy_no_emb:
    cur_theme = harm_taxonomy_no_emb[k]
    for harm in cur_theme["harms"]:
        if 'embedding' in harm:
            harm.pop('embedding')
            
        if 'context' in harm:
            harm.pop('context')
            
harm_data = {
    'harmTaxonomy': harm_taxonomy_no_emb,
    'failureModes': failure_modes
}

In [9]:
harm_taxonomy_no_emb['Representational harms']['harms'][1]

{'name': 'Demeaning and alienating social groups',
 'description': 'Narratives used to socially control or oppress social groups',
 'failureModes': ['unsafe', 'toxic', 'opinionated'],
 'reportNumbers': [2309, 2651, 2212, 2297, 2653, 1019, 48]}

In [12]:
dump(harm_taxonomy, open('./harm-taxonomy-emb.json', 'w'))

dump(harm_data, open('../src/components/harm-summary/harm-taxonomy.json', 'w'))