In [None]:
!pip3 install transformers

In [91]:
import pandas as pd
from sklearn.model_selection import train_test_split

# panda view settings
pd.set_option('display.max_columns', None)  # Display all columns
pd.set_option('display.max_rows', None)  # Display all rows
pd.set_option('display.max_colwidth', 500)  # Display full text in columns

In [71]:
df = pd.read_csv("./data/toxicbias_train.csv")
df_bias = df[df['bias'] == 'bias']
df_neutral = df[df['bias'] == 'neutral']


## Split up dataframe by category

In [122]:
# store unique categories
unique_categories = set()
for categories in df['category'].str.split(','):
    unique_categories.update(category.strip() for category in categories)

# Convert the set to a list if needed
unique_categories_list = sorted(list(unique_categories))

In [123]:
categorical_dfs = {category: pd.DataFrame(columns=df.columns) for category in unique_categories_list}

# Split categories and add rows to the corresponding dataframes in dictionary
def split_categories_and_add_rows(row):
    categories = row['category'].split(',')
    for category in categories:
        category = category.strip()  # Remove leading/trailing spaces
        if category in categorical_dfs:
            categorical_dfs[category] = categorical_dfs[category].append(row, ignore_index=True)

df_bias.apply(split_categories_and_add_rows, axis=1)

# Print the shape of each category dataframe
for category, category_df in categorical_dfs.items():
    print(f"Dataframe for {category}:")
    print(f"Shape: {category_df.shape}")
    print("\n")

Dataframe for gender:
Shape: (293, 6)


Dataframe for lgbtq:
Shape: (453, 6)


Dataframe for political:
Shape: (172, 6)


Dataframe for race:
Shape: (1768, 6)


Dataframe for religion:
Shape: (1257, 6)




## Split training and test data

In [125]:
training_data = {}
test_data = {}

# Perform the 80-20 split for each category dataframe
for category, df in categorical_dfs.items():
    train_df, test_df = train_test_split(df, test_size=0.8, random_state=42)
    
    training_data[category] = train_df
    test_data[category] = test_df

In [126]:
training_data['lgbtq']

Unnamed: 0,comment_text,identity_attack_x,bias,target,category,rationale
207,And you are a lesbian. woo hoo,0.514286,bias,lesbians,lgbtq,prejudice against lesbians
212,Yeesh what is this world coming to. Oh no someone called me a Canuck the other day my feelings are now hurt I better take time off work and sue someone. My apology would have gone like this: Im very sorry you are a bunch of queers,0.725299,bias,queers,lgbtq,prejudice against queers
295,I think its interesting that one of these two mainstream women is a transvestite Michael Obama and the other is a confirmed lesbian HilLIARy.,0.5,bias,lesbian,lgbtq,prejudice against lesbians
414,SO MUCH WINNING BY NIKKI HALEY shes going to make a great first woman president sorry Hillary but no one likes you but everyone likes Nikki Haley plus shes way better looking and not a lesbian :o,0.6,bias,lesbians,lgbtq,prejudice against lesbians
251,Who? Oh yea. The no talent obnoxious gay guy thats famous only for being gay. His 15 seconds of fame are up. Nobody care. Next. A drunk driver with the name OBrian? No surprise there.,0.742424,bias,gays,lgbtq,prejudice against gays
442,The fact that transgender people need medical treatment for their condition tells it all. Normal people come either male or female and require no medical treatment to stay that way.,0.7,bias,transgenders,lgbtq,prejudice against transgenders
415,The gays need colonoscopy.,0.685714,bias,gays,lgbtq,prejudice against gays
303,actually many would love to see a statue of Joseph McCarthy. he got rid of all those blood stained commies and queers,0.6,bias,queers,lgbtq,prejudice against queers
350,Pathetic Methodists. Lesbians gone mad. Penis envy everywhere. They are on a sinking ship and they continue to punch holes in the side. What can we say. If contraception is ok then anything goes. Romans one. Wow. Hate to be them when judgment day comes.,0.557143,bias,lesbians,lgbtq,prejudice against lesbbians
269,Overqualified cause hes black and hispanic. Guess if he was gay and trans he would really be a shoo in.,0.65,bias,"blacks,gays","race,lgbtq",prejudice against blacks and gays


# Premises

In [107]:
hypotheses = {
  'Religion' : [
    'The use of violence is encouraged or desired against a religion.',
    'A religion should be discriminated against.',
    'Members of a religion should be excluded and marginalized.',
    'Members of a religion should remain in their respective countries.',
    'Members of a religion are compared to animals.',
    'A religion or members of a religion are compared to devils or demons.',
    'The integration of a religion should be avoided in a country or society.',
    'A religion or members of a religion are stupid.',
    'A religion is inferior when compared to another religion.',
    'Religious practices of a particular faith should be restricted or banned in public spaces.',
    'Religious attire or symbols should be prohibited.',
    'A religion promotes violence or intolerance.',
    'Members of a religion are a threat.',
    'Members of a religion are not trustworthy or loyal.'
  ],
}
hypotheses


{'Religion': ['The use of violence is encouraged or desired against a religion.',
  'A religion should be discriminated against.',
  'Members of a religion should be excluded and marginalized.',
  'Members of a religion should remain in their respective countries.',
  'Members of a religion are compared to animals.',
  'A religion or members of a religion are compared to devils or demons.',
  'The integration of a religion should be avoided in a country or society.',
  'A religion or members of a religion are stupid.',
  'A religion is inferior when compared to another religion.',
  'Religious practices of a particular faith should be restricted or banned in public spaces.',
  'Religious attire or symbols should be prohibited.',
  'A religion promotes violence or intolerance.',
  'Members of a religion are a threat.',
  'Members of a religion are not trustworthy or loyal.']}