In [None]:
# install transformers, evaluate
!pip install transformers evaluate
#!pip install tensorflow # huggingface needs a backend but tf is already on colab

In [None]:
# install syft
SYFT_VERSION = ">=0.8.1b0,<0.9"
package_string = f'"syft{SYFT_VERSION}"'
%pip install {package_string} -f https://whls.blob.core.windows.net/unstable/index.html -q

In [None]:
import syft as sy
sy.requires(SYFT_VERSION)

In [None]:
node = sy.orchestra.launch(name="syft-domain", reset=True)

In [None]:
domain_client = node.login(email="info@openmined.org", password="changethis")

In [None]:
domain_client.settings.allow_guest_signup(enable=True)

In [None]:
auditor_client = node.client.register(name="Peter Jones", email="pjones@aisb.org", password="password1234")

In [None]:
auditor_client = node.login(email="pjones@aisb.org", password="password1234")

In [None]:
import pandas as pd

model_log = pd.read_csv("https://github.com/OpenMined/datasets/raw/main/AuditingBlogpost/gpt2_100row.csv")

mock_model_log = pd.read_csv("https://raw.githubusercontent.com/OpenMined/datasets/main/AuditingBlogpost/gpt2_100row_mock.csv")

main_contributor = sy.Contributor(name="Jeffrey Salazar", role="Dataset Creator", email="jsala@ailab.com")

gpt2_user_log = sy.Dataset(
    name="GPT-2 Activity Log",
    description="User interactions from GPT-2 usage in text completion.",
    contributors=[main_contributor],
    asset_list = [
        sy.Asset(
            name="gpt2-mar23-prompts-responses",
            description="Text prompts and corresponding model predictions from GPT-2 (March 2023)",
            contributors=[main_contributor],
            data=model_log,
            mock=mock_model_log
        )]
)

domain_client.upload_dataset(gpt2_user_log)

In [None]:
dataset = auditor_client.datasets[0]
asset = dataset.assets[0]
asset

In [None]:
mock = dataset.assets[0].mock
mock

In [None]:
audit_project = sy.Project(
    name="Model Output Audit",
    description="Auditing GPT2 model outputs for toxicity, bias, etc.",
    members=[auditor_client],
)
audit_project

In [None]:
@sy.syft_function_single_use(data=dataset.assets[0])
def model_output_analysis(data):
    """
    Evaluate the model's quantify the toxicity of the input texts using the R4 Target Model, 
    a pretrained hate speech classification model 
    Evaluate the model's estimated language polarity towards and social perceptions of a demographic 
    (e.g. gender, race, sexual orientation).
    """
    
    import evaluate
    toxicity = evaluate.load("toxicity", module_type="measurement")
    # regard = evaluate.load("regard", module_type="measurement")
    results_as_strings = [str(res) for res in data["result"]]
    toxicity_results = toxicity.compute(predictions=results_as_strings)
    # regard_results = regard.compute(predictions=results_as_strings)
    return toxicity_results  #, regard_results

In [None]:
audit_project.create_code_request(model_output_analysis, auditor_client)

In [None]:
auditor_client.code

In [None]:
project = audit_project.start()
project

In [None]:
domain_client.projects

In [None]:
project = domain_client.projects[0]
project

In [None]:
request = domain_client.requests[0]
request

In [None]:
asset = domain_client.datasets[0].assets[0]

In [None]:
real_result = request.code.unsafe_function(data=asset.data)
real_result

In [None]:
request.accept_by_depositing_result(real_result)

In [None]:
asset = auditor_client.datasets[0].assets[0]
auditor_client.code.model_output_analysis(data=asset).get_from(auditor_client)