# Predicting Moral Values in Text
### This Code offers predicting moral values from the MoralBERT weights deployad in Hugging Face.

In [1]:
# Libraries:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from huggingface_hub import PyTorchModelHubMixin
from transformers import AutoModel, AutoTokenizer
import torch.nn.functional as F

In [2]:
# BERT model and tokenizer:
bert_model = AutoModel.from_pretrained("bert-base-uncased")
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

In [3]:
class MyModel(
    nn.Module,
    PyTorchModelHubMixin,
    # optionally, you can add metadata which gets pushed to the model card
    # repo_url="your-repo-url",
    pipeline_tag="text-classification",
    license="mit",
):
    def __init__(self, bert_model, moral_label=2):

        super(MyModel, self).__init__()
        self.bert = bert_model
        bert_dim = 768
        self.invariant_trans = nn.Linear(768, 768)
        self.moral_classification = nn.Sequential(nn.Linear(768,768),
                                                      nn.ReLU(),
                                                      nn.Linear(768, moral_label))

    def forward(self, input_ids, token_type_ids, attention_mask):
        pooled_output = self.bert(input_ids,
                                token_type_ids = token_type_ids,
                                attention_mask = attention_mask).last_hidden_state[:,0,:]


        pooled_output = self.invariant_trans(pooled_output)


        logits = self.moral_classification(pooled_output)

        return logits

In [4]:
def preprocessing(input_text, tokenizer):
    '''
    Returns <class transformers.tokenization_utils_base.BatchEncoding> with the following fields:
    - input_ids: list of token ids
    - token_type_ids: list of token type ids
    - attention_mask: list of indices (0,1) specifying which tokens should considered by the model (return_attention_mask = True).
    '''
    return tokenizer.encode_plus(
                        input_text,
                        add_special_tokens = True,
                        max_length = 150,
                        padding = 'max_length',
                        return_attention_mask = True,
                        return_token_type_ids = True,  # Add this line
                        return_tensors = 'pt',
                        truncation=True
                   )

In [5]:
df=pd.read_csv('data/OMC/omc_tweets.csv')
df

Unnamed: 0,text,moralbert,roberta_mmp,roberta_mm,liwc_mfd,persp_bert,top_morals,morality_label,tweet.id,user.id,dataset.author.name,majority_polarity
0,Watching by myself #tweetdebate Not drinking ...,nonmoral,nonmoral,nonmoral,nonmoral,nonmoral,{'nonmoral': 4},nonmoral,936469851,10323542,drgilpin,neg
1,"@ahg3 @MichDot Yeah, slime was actually my sec...",nonmoral,nonmoral,nonmoral,purity,loyalty,"{'nonmoral': 3, 'purity': 1}",nonmoral,936470432,11752272,starweaver,neg
2,Preparing to have a heart attack #tweetdebate,purity,nonmoral,nonmoral,care,nonmoral,"{'purity': 1, 'nonmoral': 2, 'care': 1}",nonmoral,936472030,716543,kyeung808,neg
3,"no debate moderators under 50, sorry #tweetde...",nonmoral,nonmoral,nonmoral,nonmoral,nonmoral,{'nonmoral': 4},nonmoral,936472042,14759482,rebot,neg
4,@current Now staring at black screen on http:/...,nonmoral,nonmoral,nonmoral,nonmoral,fairness,{'nonmoral': 4},nonmoral,936472907,6035262,Karoli,neg
...,...,...,...,...,...,...,...,...,...,...,...,...
1762,@dksnyder you missed the bit where McCain talk...,nonmoral,nonmoral,nonmoral,fairness,authority,"{'nonmoral': 3, 'fairness': 1}",nonmoral,936727686,5752932,crysharris,neg
1763,@newshour has hashtagged the debate #debate08...,nonmoral,nonmoral,nonmoral,nonmoral,fairness,{'nonmoral': 4},nonmoral,936731119,812825,MandianaJones,pos
1764,#debate08 FOX says McCain won the debate. MSNB...,nonmoral,nonmoral,nonmoral,nonmoral,nonmoral,{'nonmoral': 4},nonmoral,936732650,666913,Autumm,neg
1765,"#debate08 did your favorite candidate ""beat"" t...",nonmoral,fairness,nonmoral,nonmoral,fairness,"{'nonmoral': 3, 'fairness': 1}",nonmoral,936733111,7769402,Laurie2,neg


In [6]:
df=pd.read_csv('data/OMC/omc_tweets.csv')

# the list of Moral (MFT) values
mft_values = ["care", "harm", "fairness", "cheating", "loyalty", "betrayal",
              "authority", "subversion", "purity", "degradation"]


# function to load the model, predict the score, and return the second value
def get_model_score(sentence, mft):
    repo_name = f"vjosap/moralBERT-predict-{mft}-in-text"

    # loading the model
    model = MyModel.from_pretrained(repo_name, bert_model=bert_model)

    # preprocessing the text
    encodeds = preprocessing(sentence, tokenizer)

    # predicting the mft score
    output = model(**encodeds)
    score = F.softmax(output, dim=1)

    # extracting and return the second value from the tensor
    mft_value = score[0, 1].item()

    return mft_value


results = []

# Sequential execution of predictions for each row
for index, row in df.iterrows():
    sentence = row['text']
    sentence_scores = {"tweet.id": row['tweet.id'], "user.id": row['user.id'], "dataset.author.name": row['dataset.author.name'],  "majority_polarity": row['majority_polarity'],"text": sentence}

    # Iterate through each MFT model and get the score
    for mft in mft_values:
        sentence_scores[mft] = get_model_score(sentence, mft)

    results.append(sentence_scores)

results_df = pd.DataFrame(results)
results_df

Unnamed: 0,tweet.id,user.id,dataset.author.name,majority_polarity,text,care,harm,fairness,cheating,loyalty,betrayal,authority,subversion,purity,degradation
0,936469851,10323542,drgilpin,neg,Watching by myself #tweetdebate Not drinking ...,0.013743,0.012883,0.000767,0.000959,0.002947,0.005768,0.002744,0.000942,0.001618,0.274478
1,936470432,11752272,starweaver,neg,"@ahg3 @MichDot Yeah, slime was actually my sec...",0.008040,0.018728,0.000272,0.000297,0.000916,0.000427,0.000945,0.000564,0.000635,0.008745
2,936472030,716543,kyeung808,neg,Preparing to have a heart attack #tweetdebate,0.517000,0.030205,0.001227,0.000476,0.001366,0.023553,0.002339,0.001797,0.002030,0.883706
3,936472042,14759482,rebot,neg,"no debate moderators under 50, sorry #tweetde...",0.013429,0.002282,0.000623,0.000478,0.002599,0.001442,0.002178,0.000764,0.000729,0.145683
4,936472907,6035262,Karoli,neg,@current Now staring at black screen on http:/...,0.011750,0.007145,0.000974,0.000430,0.008603,0.005854,0.020601,0.001257,0.000703,0.145242
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1762,936727686,5752932,crysharris,neg,@dksnyder you missed the bit where McCain talk...,0.008001,0.001104,0.004453,0.000682,0.002540,0.002087,0.022247,0.000565,0.000761,0.103226
1763,936731119,812825,MandianaJones,pos,@newshour has hashtagged the debate #debate08...,0.007334,0.007144,0.000718,0.001193,0.008811,0.002166,0.000747,0.001246,0.000570,0.091282
1764,936732650,666913,Autumm,neg,#debate08 FOX says McCain won the debate. MSNB...,0.010902,0.000858,0.001591,0.000810,0.003373,0.001738,0.013386,0.000657,0.000775,0.241765
1765,936733111,7769402,Laurie2,neg,"#debate08 did your favorite candidate ""beat"" t...",0.007426,0.000610,0.000255,0.000430,0.001218,0.000636,0.000553,0.000614,0.000513,0.072123


In [7]:
results_df.to_csv('data/OMC/omc_morality.csv',index=False)

In [8]:
df=pd.read_csv('data/OMC/omc_morality.csv')
df

Unnamed: 0,tweet.id,user.id,dataset.author.name,majority_polarity,text,care,harm,fairness,cheating,loyalty,betrayal,authority,subversion,purity,degradation
0,936469851,10323542,drgilpin,neg,Watching by myself #tweetdebate Not drinking ...,0.013743,0.012883,0.000767,0.000959,0.002947,0.005768,0.002744,0.000942,0.001618,0.274478
1,936470432,11752272,starweaver,neg,"@ahg3 @MichDot Yeah, slime was actually my sec...",0.008040,0.018728,0.000272,0.000297,0.000916,0.000427,0.000945,0.000564,0.000635,0.008745
2,936472030,716543,kyeung808,neg,Preparing to have a heart attack #tweetdebate,0.517000,0.030205,0.001227,0.000476,0.001366,0.023553,0.002339,0.001797,0.002030,0.883706
3,936472042,14759482,rebot,neg,"no debate moderators under 50, sorry #tweetde...",0.013429,0.002282,0.000623,0.000478,0.002599,0.001442,0.002178,0.000764,0.000729,0.145683
4,936472907,6035262,Karoli,neg,@current Now staring at black screen on http:/...,0.011750,0.007145,0.000974,0.000430,0.008603,0.005854,0.020601,0.001257,0.000703,0.145242
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1762,936727686,5752932,crysharris,neg,@dksnyder you missed the bit where McCain talk...,0.008001,0.001104,0.004453,0.000682,0.002540,0.002087,0.022247,0.000565,0.000761,0.103226
1763,936731119,812825,MandianaJones,pos,@newshour has hashtagged the debate #debate08...,0.007334,0.007144,0.000718,0.001193,0.008811,0.002166,0.000747,0.001246,0.000570,0.091282
1764,936732650,666913,Autumm,neg,#debate08 FOX says McCain won the debate. MSNB...,0.010902,0.000858,0.001591,0.000810,0.003373,0.001738,0.013386,0.000657,0.000775,0.241765
1765,936733111,7769402,Laurie2,neg,"#debate08 did your favorite candidate ""beat"" t...",0.007426,0.000610,0.000255,0.000430,0.001218,0.000636,0.000553,0.000614,0.000513,0.072123
