In [5]:
#import modules
import pandas as pd
import numpy as np
from transformers import BertForSequenceClassification,BertTokenizerFast,AutoModelForSequenceClassification,AutoTokenizer,Trainer, TrainingArguments
import tensorflow as tf
from tensorflow import keras
from datasets import Dataset
from sklearn.metrics import classification_report,f1_score
import re
import evaluate

In [6]:
#processed Train data
p_train_df=pd.read_csv('../../Data/data_processed_train.csv',encoding_errors='strict',encoding="utf-8")
p_train_df.head()

Unnamed: 0,target,text
0,-1,roomie call inform someone try break apartment...
1,-1,would get 16gb iphone didnt extra 100
2,-1,stay home boring day
3,1,voting eu parliament heritage rule danish mona...
4,1,ooooh sbs2 thats excite relevant medium audien...


In [7]:
#processed Test and validation data
p_test_df=pd.read_csv("../../Data/data_processed_test.csv")
p_val_df=pd.read_csv("../../Data/data_processed_validate.csv")

In [8]:
# Length of all dataframe and number of observation in each dataframe

In [9]:

len(p_train_df[p_train_df.target==1]),len(p_train_df[p_train_df.target==-1])

(4986, 5014)

In [10]:
len(p_test_df[p_test_df.target==1]),len(p_test_df[p_test_df.target==-1])

(2479, 2521)

In [11]:
len(p_val_df[p_val_df.target==1]),len(p_val_df[p_val_df.target==-1])

(988, 1012)

# Tokenizer loading

In [12]:
bert_tokenizer=BertTokenizerFast.from_pretrained("bert-large-uncased")

Downloading (…)okenizer_config.json: 100%|██████████| 28.0/28.0 [00:00<?, ?B/s]
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Downloading (…)solve/main/vocab.txt: 100%|██████████| 232k/232k [00:00<00:00, 5.05MB/s]
Downloading (…)/main/tokenizer.json: 100%|██████████| 466k/466k [00:00<00:00, 6.13MB/s]
Downloading (…)lve/main/config.json: 100%|██████████| 571/571 [00:00<?, ?B/s] 


In [13]:
tokenizer=AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")


Downloading (…)okenizer_config.json: 100%|██████████| 48.0/48.0 [00:00<?, ?B/s]
Downloading (…)lve/main/config.json: 100%|██████████| 629/629 [00:00<?, ?B/s] 
Downloading (…)solve/main/vocab.txt: 100%|██████████| 232k/232k [00:00<00:00, 21.7MB/s]


In [14]:
MAX_LEN=128

In [15]:
def text_len_cal(df):
    text_len = []
    for text in df.text:
        tweet_len = len(text.split())
        text_len.append(tweet_len)
    df["text_len"]=text_len
    return df 


def token_lens_cal(df):
    token_lens = []

    for i,txt in enumerate(df.text.values):
        tokens = tokenizer.encode(txt, max_length=512, truncation=True)
        token_lens.append(len(tokens))
    df["token_lens"]=token_lens

    
    return df



def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)


def data_preprocess(df):
    df['target'].replace({-1:0},inplace=True)
    df.rename(columns={'target':'label'}, inplace=True)
    return df


In [16]:
p_train_df=text_len_cal(p_train_df)
p_val_df=text_len_cal(p_val_df)
p_test_df=text_len_cal(p_test_df)

In [17]:
p_train_df=token_lens_cal(p_train_df)
p_val_df=token_lens_cal(p_val_df)
p_test_df=token_lens_cal(p_test_df)

In [18]:

p_train_df = data_preprocess(p_train_df)
p_eval_df = data_preprocess(p_val_df)
p_test_df = data_preprocess(p_test_df)

p_train_df

Unnamed: 0,label,text,text_len,token_lens
0,0,roomie call inform someone try break apartment...,8,14
1,0,would get 16gb iphone didnt extra 100,7,11
2,0,stay home boring day,4,6
3,1,voting eu parliament heritage rule danish mona...,12,14
4,1,ooooh sbs2 thats excite relevant medium audien...,12,20
...,...,...,...,...
9995,1,man sideswith he sweetest dont fck cuz there t...,12,22
9996,0,omg weather disgust today look like im go pull...,10,15
9997,0,watch green mile want john coffey die,7,10
9998,1,one 1st 100 tweet it id really like demo 20,10,14


In [19]:
def create_dataset(df):
    dataset = Dataset.from_pandas(df)
    train_dataset = dataset.map(tokenize_function, batched=True)
    return train_dataset
    

In [20]:
p_train_dataset=create_dataset(p_train_df)
p_test_dataset=create_dataset(p_test_df)
p_eval_dataset=create_dataset(p_eval_df)

                                                                   

In [21]:
def prediction_output(df,tokenize_function,trainer):
#     df=data_preprocess(df)
    dataset = Dataset.from_pandas(df)
    train_dataset = dataset.map(tokenize_function, batched=True)
    predictions = trainer.predict(train_dataset)
    train_preds = predictions.predictions.argmax(-1)
    train_labels = pd.Series(train_preds).map(bert_model.config.id2label)
    train_scores = (np.exp(predictions[0])/np.exp(predictions[0]).sum(-1,keepdims=True)).max(1)
    train_pred_df = pd.DataFrame(list(zip(train_preds,train_labels,train_scores)), columns=['pred','label','score'])
    return train_pred_df

In [22]:
p_train_dataset

Dataset({
    features: ['label', 'text', 'text_len', 'token_lens', 'input_ids', 'attention_mask'],
    num_rows: 10000
})

**# Preporcssed data without hypertuning parameters**

In [23]:
bert_model=BertForSequenceClassification.from_pretrained("bert-large-uncased")

Downloading pytorch_model.bin: 100%|██████████| 1.34G/1.34G [00:21<00:00, 63.7MB/s]
Some weights of the model checkpoint at bert-large-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weigh

In [24]:
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english",num_labels=2)

Downloading pytorch_model.bin: 100%|██████████| 268M/268M [00:03<00:00, 68.0MB/s] 


In [25]:

trainer1=Trainer(model=model,
                train_dataset=p_train_dataset,
               eval_dataset=p_eval_dataset)

In [26]:
trainer1.train()

  0%|          | 9/3750 [02:10<14:52:27, 14.31s/it]

KeyboardInterrupt: 

In [None]:
def prediction_report(trainer,test):
    predictions = trainer.predict(test)
    preds = predictions.predictions.argmax(-1)
    labels = pd.Series(preds).map({0:'negative',1:'positive'})
    scores = (np.exp(predictions[0])/np.exp(predictions[0]).sum(-1,keepdims=True)).max(1)
    df = pd.DataFrame(list(zip(preds,labels,scores)), columns=['pred','label','score'])
    
    
    y_pred = preds
    y_true = test['label']
    
    
    print(classification_report(y_true, y_pred, target_names=['negative','positive'], digits=4))
    return df

In [None]:
prediction_report(trainer1,p_test_dataset)

In [None]:
trainer1.save_model("Trainer1.h5")

In [None]:
metric = evaluate.combine(["accuracy", "f1", "precision", "recall"])


def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)


In [None]:

model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english",num_labels=2)
training_args = TrainingArguments(output_dir="test_trainer", overwrite_output_dir=True, evaluation_strategy="epoch", learning_rate=0.0001, per_device_train_batch_size=8)

trainer2 = Trainer(
    model=model,
    args=training_args,
    train_dataset=p_train_dataset,
    eval_dataset=p_eval_dataset,
    compute_metrics=compute_metrics,
)
trainer2.train()



In [None]:
prediction_report(trainer2,p_test_dataset)


In [None]:
trainer2.save_model("Trainer2")

# Unprocessed data to train

In [None]:
train_df=pd.read_csv('../../Data/data_unprocessed_train.csv',encoding="utf-8")
train_df.head()

eval_df=pd.read_csv('../../Data/data_unprocessed_validate.csv',encoding="utf-8")
eval_df.head()

test_df=pd.read_csv('../../Data/data_unprocessed_test.csv',encoding="utf-8")
test_df.head()

In [None]:
train_df=text_len_cal(train_df)
eval_df=text_len_cal(eval_df)
test_df=text_len_cal(test_df)
train_df=token_lens_cal(train_df)
eval_df=token_lens_cal(eval_df)
test_df=token_lens_cal(test_df)
train_df=data_preprocess(train_df)
eval_df=data_preprocess(eval_df)
test_df=data_preprocess(test_df)

In [None]:
train=create_dataset(train_df)
evald=create_dataset(eval_df)
training_args = TrainingArguments(output_dir="test_trainer", overwrite_output_dir=True, num_train_epochs=5, learning_rate=5e-6, per_device_train_batch_size=30)

trainer3 = Trainer(
    model=model,
    args=training_args,
    train_dataset=train,
    eval_dataset=evald,
    compute_metrics=compute_metrics,
)
trainer3.train()

In [None]:
test=create_dataset(test_df)
prediction_report(trainer3,test)

In [None]:
trainer3.save_model("Trainer3")

In [None]:

training_args = TrainingArguments(output_dir="test_trainer", overwrite_output_dir=True, evaluation_strategy="epoch", per_device_train_batch_size=8)

trainer4 = Trainer(
    model=model,
    args=training_args,
    train_dataset=train,
    eval_dataset=evald,
    compute_metrics=compute_metrics,
)
trainer4.train()

In [None]:
trainer4.save_model("Trainer4")

# EEC COrpus bias

In [None]:
model=AutoModelForSequenceClassification.from_pretrained("/kaggle/working/Trainer2",local_files_only=True)

In [None]:
data=pd.read_csv("../../Data/EEC/Equity-Evaluation-Corpus.csv")

data['Sentence'].nunique()

data.isnull().sum()

data.head(10)

data.shape

data.head()
data.rename(columns={"Sentence":"text"},inplace=True)

In [None]:
data.head()

In [None]:
data.Race.unique()

In [None]:
df_test_male=data[data.Gender=="male"][["text"]].copy()


In [None]:
df_test_male

In [None]:
df_test_female=data[data.Gender=="female"][["text"]].copy()

In [None]:
df_test_female

In [None]:
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

dataset_test_male = Dataset.from_pandas(df_test_male)
dataset_test_female = Dataset.from_pandas(df_test_female)

In [None]:
test_dataset_male = dataset_test_male.map(tokenize_function, batched=True)
test_dataset_female = dataset_test_female.map(tokenize_function, batched=True)

trainer = Trainer(model=model)

In [None]:
predictions_male = trainer3.predict(test_dataset_male)
predictions_female = trainer3.predict(test_dataset_female)

preds_male = predictions_male.predictions.argmax(-1)
scores_male = (np.exp(predictions_male[0])/np.exp(predictions_male[0]).sum(-1,keepdims=True)).max(1)

preds_female = predictions_female.predictions.argmax(-1)
scores_female = (np.exp(predictions_female[0])/np.exp(predictions_female[0]).sum(-1,keepdims=True)).max(1)

df = pd.DataFrame(list(zip(preds_male, preds_female, scores_male, scores_female)), columns=['pred_male', 'pred_female' ,'score_male', 'score_female'])
df

In [None]:
df['Match'] = df['pred_male'] == df['pred_female']
df['Match'].value_counts()

In [None]:
df[df['Match'] == False]

In [None]:
 BIAS_THRESHOLD=0.05

In [None]:
df['diff'] = abs(df['score_male'] - df['score_female'])
# This is dropping the 29 records that dont have the same prediction. 
df = df[df['Match']==True]



df_sig = df[df['diff'] > BIAS_THRESHOLD]
df_nonsig = df[df['diff'] <= BIAS_THRESHOLD]

df_nonsig

In [None]:
df_sig

In [None]:
pd.options.display.float_format = '{:.6f}'.format
df_sig[['score_male', 'score_female','diff']].describe()

In [None]:
male_anger=create_dataset( data[(data.Emotion=="anger") & (data.Gender=="male")][["text"]])
female_anger=create_dataset(data[(data.Emotion=="anger") & (data.Gender=="female")][["text"]])

In [None]:
def bias_check(df,df2,key,trainer):
    predictions_male = trainer.predict(df)
    predictions_female = trainer.predict(df2)

    preds_male = predictions_male.predictions.argmax(-1)
    scores_male = (np.exp(predictions_male[0])/np.exp(predictions_male[0]).sum(-1,keepdims=True)).max(1)

    preds_female = predictions_female.predictions.argmax(-1)
    scores_female = (np.exp(predictions_female[0])/np.exp(predictions_female[0]).sum(-1,keepdims=True)).max(1)

    df = pd.DataFrame(list(zip(preds_male, preds_female, scores_male, scores_female)), columns=[f'pred_male_{key}', f'pred_female_{key}' ,f'score_male_{key}', f'score_female_{key}'])
    df['Match'] = df[f'pred_male_{key}'] == df[f'pred_female_{key}']
    print(f"number of matches {df['Match'].value_counts()}")
    print(f"number of observations {len(df)}")
    
    df[df['Match'] == False]
    
    BIAS_THRESHOLD=0.05
    
    df['diff'] = abs(df[f'score_male_{key}'] - df[f'score_female_{key}'])
    # This is dropping the 29 records that dont have the same prediction. 
    df = df[df['Match']==True]


    df_sig = df[df['diff'] > BIAS_THRESHOLD]
    df_nonsig = df[df['diff'] <= BIAS_THRESHOLD]

    pd.options.display.float_format = '{:.6f}'.format
    return df_sig[[f'score_male_{key}', f'score_female_{key}','diff']].describe()

In [None]:
bias_check(male_anger,female_anger,"anger",trainer3)

In [None]:
male_sad=create_dataset( data[(data.Emotion=="sadness") & (data.Gender=="male")][["text"]])
female_sad=create_dataset(data[(data.Emotion=="sadness") & (data.Gender=="female")][["text"]])
bias_check(male_sad,female_sad,"sad",trainer3)

In [None]:
male_fear=create_dataset( data[(data.Emotion=="fear") & (data.Gender=="male")][["text"]])
female_fear=create_dataset(data[(data.Emotion=="fear") & (data.Gender=="female")][["text"]])
bias_check(male_fear,female_fear,"fear",trainer3)

In [None]:
male_joy=create_dataset( data[(data.Emotion=="joy") & (data.Gender=="male")][["text"]])
female_joy=create_dataset(data[(data.Emotion=="joy") & (data.Gender=="female")][["text"]])
bias_check(male_joy,female_joy,"joy",trainer3)

In [None]:
data[(data.Race.isnull()) & (data.Gender=="male")]
data[(data.Race.isnull()) & (data.Gender=="female")]

In [None]:
data.Emotion.fillna("No_emotion",inplace=True)

In [None]:
data[(data.Emotion=="No_emotion") & (data.Gender=="female")]

In [None]:
male_noemotion=create_dataset(data[(data.Emotion=="No_emotion") & (data.Gender=="male")][["text"]])
female_noemotion=create_dataset(data[(data.Emotion=="No_emotion") & (data.Gender=="female")][["text"]])
bias_check(male_noemotion,female_noemotion,"no_emotion",trainer3)

In [None]:
male_noemotion=create_dataset(data[(data.Emotion=="No_emotion") & (data.Gender=="male")][["text"]])
female_noemotion=create_dataset(data[(data.Emotion=="No_emotion") & (data.Gender=="female")][["text"]])
bias_check(male_noemotion,female_noemotion,"no_emotion",trainer3)

In [None]:
race_male_no_emtion=create_dataset(data[(data.Race.isnull()) & (data.Gender=="male")])
race_female_no_emotion=create_dataset(data[(data.Race.isnull()) & (data.Gender=="female")])
bias_check(race_male_no_emtion,race_female_no_emotion,"No_emtion",trainer3)

In [None]:
c

In [None]:
data.Person.unique()

In [None]:
male_nouns = ["he",'this man', 'this boy', 'my brother', 'my son', 'my husband',
'my boyfriend', 'my father', 'my uncle', 'my dad',"him"]

female_noun = ['she', 'this woman', 'this girl', 'my sister',
'my daughter', 'my wife', 'my girlfriend', 'my mother', 'my aunt',
'my mom', 'her']

In [None]:
for i,j in zip(male_nouns,female_noun):
    male=create_dataset(data[(data.Race.isnull()) & (data.Person==i)])
    female=create_dataset(data[(data.Race.isnull()) & (data.Person==j)])
    
    print(bias_check(male,female,f"{i}_{j}",trainer3))

In [None]:
race_male_anger=create_dataset(data[(data.Race=="African-American") & (data.Gender=="male")])
race_female_anger=create_dataset(data[(data.Race=="African-American") & (data.Gender=="female")])
bias_check(race_male_anger,race_female_anger,"anger",trainer3)

In [None]:
def gender_compare(set_name, male_set, female_set):    
    predictions_male = trainer.predict(male_set)
    predictions_female = trainer.predict(female_set)

    preds_male = predictions_male.predictions.argmax(-1)
    scores_male = (np.exp(predictions_male[0])/np.exp(predictions_male[0]).sum(-1,keepdims=True)).max(1)

    preds_female = predictions_female.predictions.argmax(-1)
    scores_female = (np.exp(predictions_female[0])/np.exp(predictions_female[0]).sum(-1,keepdims=True)).max(1)

    df = pd.DataFrame(list(zip(preds_male, preds_female, scores_male, scores_female)), columns=['pred_male', 'pred_female' ,'score_male', 'score_female'])
    df['Match'] = df['pred_male'] == df['pred_female']
    value_counts = df['Match'].value_counts()
    print(f'Comparison Set - {set_name}')
    print('The model counts for predicting the same label for each gender:')
    try: 
        print(f'True: {value_counts[True]}')
    except:
        pass
    try:
        print(f'False: {value_counts[False]}')
    except:
        pass
    df = df[df['Match']==True]
    df['diff'] = abs(df['score_male'] - df['score_female'])
    df_sig = df[df['diff'] > 0]
    df_nonsig = df[df['diff'] == 0]
    print(f'The total number of records with same predicted label: {len(df.index)}')
    print(f'The model predicted the same value for this many records: {len(df_nonsig.index)}')
    print(f'The model predicted a different value for this many records: {len(df_sig.index)}')
    print(f"Male average: {df_sig.describe().loc['mean', 'score_male']}")
    print(f"Female average: {df_sig.describe().loc['mean', 'score_female']}")
    print(f"Average difference: {df_sig.describe().loc['mean', 'diff']}")
    print('-'*25)
    print()

In [None]:
def race_compare(set_name, AA_set, E_set):    
    predictions_AA = trainer.predict(AA_set)
    predictions_E = trainer.predict(E_set)

    preds_AA = predictions_AA.predictions.argmax(-1)
    scores_AA = (np.exp(predictions_AA[0])/np.exp(predictions_AA[0]).sum(-1,keepdims=True)).max(1)

    preds_E = predictions_E.predictions.argmax(-1)
    scores_E = (np.exp(predictions_E[0])/np.exp(predictions_E[0]).sum(-1,keepdims=True)).max(1)

    df = pd.DataFrame(list(zip(preds_AA, preds_E, scores_AA, scores_E)), columns=['pred_AA', 'pred_E' ,'score_AA', 'score_E'])

    df['Match'] = df['pred_AA'] == df['pred_E']
    value_counts = df['Match'].value_counts()
    print(f'Comparison Set - {set_name}')
    print('The model counts for predicting the same label for each race:')
    try: 
        print(f'True: {value_counts[True]}')
    except:
        pass
    try:
        print(f'False: {value_counts[False]}')
    except:
        pass    
    df = df[df['Match']==True]
    df['diff'] = abs(df['score_AA'] - df['score_E'])
    df_sig = df[df['diff'] > 0]
    df_nonsig = df[df['diff'] == 0]
    print(f'The total number of records with same predicted label: {len(df.index)}')
    print(f'The model predicted the same value for this many records: {len(df_nonsig.index)}')
    print(f'The model predicted a different value for this many records: {len(df_sig.index)}')
    print(f"African-American average: {df_sig.describe().loc['mean', 'score_AA']}")
    print(f"European average: {df_sig.describe().loc['mean', 'score_E']}")
    print(f"Average difference: {df_sig.describe().loc['mean', 'diff']}")
    print('-'*25)
    print()

In [None]:
data[(data.Gender=="male") & (data.Emotion=="joy") & (data.Race=="African-American")]
df_female_angry_AA =data[(data.Gender=="female") & (data.Emotion=="anger") & (data.Race=="African-American")][["text"]] 
df_female_fear_AA = data[(data.Gender=="female") & (data.Emotion=="anger") & (data.Race=="African-American")][["text"]]
df_female_joy_AA = data[(data.Gender=="female") & (data.Emotion=="joy") & (data.Race=="African-American")][["text"]]
df_female_sadness_AA = data[(data.Gender=="female") & (data.Emotion=="sadness") & (data.Race=="African-American")][["text"]]
df_male_angry_AA = data[(data.Gender=="male") & (data.Emotion=="anger") & (data.Race=="African-American")][["text"]]
df_male_fear_AA = data[(data.Gender=="male") & (data.Emotion=="fear") & (data.Race=="African-American")][["text"]]
df_male_joy_AA = data[(data.Gender=="male") & (data.Emotion=="joy") & (data.Race=="African-American")][["text"]]
df_male_sadness_AA = data[(data.Gender=="male") & (data.Emotion=="sadness") & (data.Race=="African-American")][["text"]]
df_female_angry_E = data[(data.Gender=="female") & (data.Emotion=="anger") & (data.Race=="European")][["text"]]
df_female_fear_E = data[(data.Gender=="female") & (data.Emotion=="fear") & (data.Race=="European")][["text"]]
df_female_joy_E = data[(data.Gender=="female") & (data.Emotion=="joy") & (data.Race=="European")][["text"]]
df_female_sadness_E = data[(data.Gender=="female") & (data.Emotion=="sadness") & (data.Race=="European")][["text"]]
df_male_angry_E = data[(data.Gender=="male") & (data.Emotion=="fear") & (data.Race=="European")][["text"]]
df_male_fear_E = data[(data.Gender=="male") & (data.Emotion=="fear") & (data.Race=="European")][["text"]]
df_male_joy_E = data[(data.Gender=="male") & (data.Emotion=="joy") & (data.Race=="European")][["text"]]
df_male_sadness_E = data[(data.Gender=="male") & (data.Emotion=="sadness") & (data.Race=="European")][["text"]]
df_female_AA_non_emotion = data[(data.Gender=="female") & (data.Emotion.isnull()) & (data.Race=="African-American")][["text"]]
df_male_AA_non_emotion = data[(data.Gender=="male") & (data.Emotion.isnull()) & (data.Race=="African-American")][["text"]]
df_female_E_non_emotion =data[(data.Gender=="female") & (data.Emotion.isnull()) & (data.Race=="European")][["text"]]
df_male_E_non_emotion = data[(data.Gender=="male") & (data.Emotion.isnull()) & (data.Race=="European")][["text"]]
df_female_non_emotion = data[(data.Gender=="female") & (data.Emotion.isnull())][["text"]]
df_male_non_emotion = data[(data.Gender=="male") & (data.Emotion.isnull()) ][["text"]]
df_female_angry_non_race = data[(data.Gender=="female") & (data.Emotion=="anger") & (data.Race.isnull())][["text"]]
df_female_fear_non_race = data[(data.Gender=="female") & (data.Emotion=="fear") & (data.Race.isnull())][["text"]]
df_female_joy_non_race = data[(data.Gender=="female") & (data.Emotion=="joy") & (data.Race.isnull())][["text"]]
df_female_sadness_non_race = data[(data.Gender=="female") & (data.Emotion=="sadness") & (data.Race.isnull())][["text"]]
df_male_angry_non_race = data[(data.Gender=="male") & (data.Emotion=="anger") & (data.Race.isnull())][["text"]]
df_male_fear_non_race = data[(data.Gender=="male") & (data.Emotion=="fear") & (data.Race.isnull())][["text"]]
df_male_joy_non_race = data[(data.Gender=="male") & (data.Emotion=="joy") & (data.Race.isnull())][["text"]]
df_male_sadness_non_race = data[(data.Gender=="male") & (data.Emotion=="sadness") & (data.Race.isnull())][["text"]]

In [None]:
data[(data.Race.isnull()) & (data.Emotion.isnull())].Gender.unique()

In [None]:
df_female_angry_AA

In [None]:
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

def data_prep(df):
    return Dataset.from_pandas(df).map(tokenize_function, batched=True)

df_female_angry_AA = data_prep(df_female_angry_AA)
df_female_fear_AA = data_prep(df_female_fear_AA)
df_female_joy_AA = data_prep(df_female_joy_AA)
df_female_sadness_AA = data_prep(df_female_sadness_AA)
df_male_angry_AA = data_prep(df_male_angry_AA)
df_male_fear_AA = data_prep(df_male_fear_AA)
df_male_joy_AA = data_prep(df_male_joy_AA)
df_male_sadness_AA = data_prep(df_male_sadness_AA)
df_female_angry_E = data_prep(df_female_angry_E)
df_female_fear_E = data_prep(df_female_fear_E)
df_female_joy_E = data_prep(df_female_joy_E)
df_female_sadness_E = data_prep(df_female_sadness_E)
df_male_angry_E = data_prep(df_male_angry_E)
df_male_fear_E = data_prep(df_male_fear_E)
df_male_joy_E = data_prep(df_male_joy_E)
df_male_sadness_E = data_prep(df_male_sadness_E)
df_female_AA_non_emotion = data_prep(df_female_AA_non_emotion)
df_male_AA_non_emotion = data_prep(df_male_AA_non_emotion)
df_female_E_non_emotion = data_prep(df_female_E_non_emotion)
df_male_E_non_emotion = data_prep(df_male_E_non_emotion)
df_female_non_emotion = data_prep(df_female_non_emotion)
df_male_non_emotion = data_prep(df_male_non_emotion)
df_female_angry_non_race = data_prep(df_female_angry_non_race)
df_female_fear_non_race = data_prep(df_female_fear_non_race)
df_female_joy_non_race = data_prep(df_female_joy_non_race)
df_female_sadness_non_race = data_prep(df_female_sadness_non_race)
df_male_angry_non_race = data_prep(df_male_angry_non_race)
df_male_fear_non_race = data_prep(df_male_fear_non_race)
df_male_joy_non_race = data_prep(df_male_joy_non_race)
df_male_sadness_non_race = data_prep(df_male_sadness_non_race)

trainer = Trainer(model=model)

In [None]:
gender_compare('angry_AA', df_male_angry_AA, df_female_angry_AA)
gender_compare('fear_AA', df_male_fear_AA, df_female_fear_AA)
gender_compare('joy_AA', df_male_joy_AA, df_female_joy_AA)
gender_compare('sadness_AA', df_male_sadness_AA, df_female_sadness_AA)

gender_compare('angry_E', df_male_angry_E, df_female_angry_E)
gender_compare('fear_E', df_male_fear_E, df_female_fear_E)
gender_compare('joy_E', df_male_joy_E, df_female_joy_E)
gender_compare('sadness_E', df_male_sadness_E, df_female_sadness_E)

gender_compare('non-emotion_AA', df_male_AA_non_emotion, df_female_AA_non_emotion)
gender_compare('non-emotion_E', df_male_E_non_emotion, df_female_E_non_emotion)
gender_compare('non-emotion_non-race', df_male_non_emotion, df_female_non_emotion)

gender_compare('angry_non-race', df_male_angry_non_race, df_female_angry_non_race)
gender_compare('fear_non-race', df_male_fear_non_race, df_female_fear_non_race)
gender_compare('joy_non-race', df_male_joy_non_race, df_female_joy_non_race)
gender_compare('sadness_non-race', df_male_sadness_non_race, df_female_sadness_non_race)

In [None]:
race_compare('angry_male', df_male_angry_AA, df_male_angry_E)
race_compare('fear_male', df_male_fear_AA, df_male_fear_E)
race_compare('joy_male', df_male_joy_AA, df_male_joy_E)
race_compare('sadness_male', df_male_sadness_AA, df_male_sadness_E)

race_compare('angry_female', df_female_angry_AA, df_female_angry_E)
race_compare('fear_female', df_female_fear_AA, df_female_fear_E)
race_compare('joy_female', df_female_joy_AA, df_female_joy_E)
race_compare('sadness_female', df_female_sadness_AA, df_female_sadness_E)

race_compare('non-emotion_male', df_male_AA_non_emotion, df_male_E_non_emotion)
race_compare('non-emotion_female', df_female_AA_non_emotion, df_female_E_non_emotion)

In [None]:
test_to_check=pd.read_csv("/kaggle/input/testdata/data.txt")

In [None]:
test_to_check=create_dataset(test_to_check[["text"]])

In [None]:
test_to_check

In [None]:
def prediction_report(trainer,test):
    predictions = trainer.predict(test)
    preds = predictions.predictions.argmax(-1)
    labels = pd.Series(preds).map({0:'negative',1:'positive'})
    scores = (np.exp(predictions[0])/np.exp(predictions[0]).sum(-1,keepdims=True)).max(1)
    df = pd.DataFrame(list(zip(preds,labels,scores)), columns=['pred','label','score'])
    
    
    y_pred = preds
    y_true = test['text']
    
    
    print(classification_report(y_true, y_pred, digits=4))
    return df

In [None]:
predictions = trainer3.predict(test_to_check)
preds = predictions.predictions.argmax(-1)
labels = pd.Series(preds).map({0:'negative',1:'positive'})
scores = (np.exp(predictions[0])/np.exp(predictions[0]).sum(-1,keepdims=True)).max(1)
df = pd.DataFrame(list(zip(preds,labels,scores)), columns=['pred','label','score'])



In [None]:
df["text"]=test_to_check["text"]

In [None]:
df