In [None]:
!pip install datasets
!pip install peft transformers

In [2]:
from datasets import load_dataset
import pandas as pd
import re

In [62]:
# Load train dataset
ds = load_dataset("ailsntua/QEvasion")

# Convert to pandas and keep only useful columns
df_train = ds["train"].to_pandas()[["question","interview_answer",
                                   "label"]]

In [45]:
df_train

Unnamed: 0,question,interview_answer,label
0,How would you respond to the accusation that t...,"\nThe President. Well, look, first of all, the...",Explicit
1,Do you think President Xi is being sincere abo...,"\nThe President. Well, look, first of all, the...",General
2,1. Q1: Do you believe the country's slowdown a...,"\nThe President. Look, I think China has a dif...",Partial/half-answer
3,2. Q2: Are you worried about the meeting betwe...,"\nThe President. Look, I think China has a dif...",Dodging
4,Is the President's engagement with Asian coun...,"\nThe President. Well, I hope I get to see Mr....",Explicit
...,...,...,...
3443,Why shouldn't Americans give Democrats a chan...,\nThe President. That's a tricky little questi...,Deflection
3444,Inquiry about the belief regarding the abilit...,"\nThe President. Mike, I believe Iraq will be ...",Explicit
3445,2. Are you resentful that some Republican cand...,"\nThe President. You know, no, I'm not resentf...",Explicit
3446,1. If you really didn't think that Republicans...,"\nThe President. You know, no, I'm not resentf...",Deflection


In [None]:
# Regex explanation:

# ^ matches the start of the string
# (\d+\.|Part \d+:|Q\d*:|\d+\. Q\d*: ) is a capturing group that matches one of the following:
#     \d+\. : one or more digits followed by a period
#     Part \d+: : the string "Part " followed by one or more digits, a colon, and an optional space
#     Q\d*: : the string "Q" followed by one or more digits, a colon, and an optional space
#     \d+\. Q\d*: : one or more digits followed by a period, a space, "Q", one or more digits, a colon, and an optional space
#     - : start sentence with "-"

In [63]:
# Remove indexing from questions
index_pattern = r'^(\d+\. Q\d+:|\d+\.|Part \d+:|Q\d+:|-)'
df_train['question'] = df_train['question'].str.replace(index_pattern, '', regex=True)

# Remove quotes
df_train['question'] = df_train['question'].str.replace('"', '')

In [64]:
df_train

Unnamed: 0,question,interview_answer,label
0,How would you respond to the accusation that t...,"\nThe President. Well, look, first of all, the...",Explicit
1,Do you think President Xi is being sincere abo...,"\nThe President. Well, look, first of all, the...",General
2,Do you believe the country's slowdown and gro...,"\nThe President. Look, I think China has a dif...",Partial/half-answer
3,Are you worried about the meeting between Pre...,"\nThe President. Look, I think China has a dif...",Dodging
4,Is the President's engagement with Asian coun...,"\nThe President. Well, I hope I get to see Mr....",Explicit
...,...,...,...
3443,Why shouldn't Americans give Democrats a chan...,\nThe President. That's a tricky little questi...,Deflection
3444,Inquiry about the belief regarding the abilit...,"\nThe President. Mike, I believe Iraq will be ...",Explicit
3445,Are you resentful that some Republican candid...,"\nThe President. You know, no, I'm not resentf...",Explicit
3446,If you really didn't think that Republicans w...,"\nThe President. You know, no, I'm not resentf...",Deflection


In [65]:
# Remove first sentence from answer
sentence_pattern = r'^[^.]+\.?'
df_train['interview_answer'] = df_train['interview_answer'].str.replace(sentence_pattern, '', regex=True)

In [54]:
df_train

Unnamed: 0,question,interview_answer,label
0,How would you respond to the accusation that t...,"Well, look, first of all, the—I am sincere ab...",Explicit
1,Do you think President Xi is being sincere abo...,"Well, look, first of all, the—I am sincere ab...",General
2,Do you believe the country's slowdown and gro...,"Look, I think China has a difficult economic ...",Partial/half-answer
3,Are you worried about the meeting between Pre...,"Look, I think China has a difficult economic ...",Dodging
4,Is the President's engagement with Asian coun...,"Well, I hope I get to see Mr. Xi sooner than ...",Explicit
...,...,...,...
3443,Why shouldn't Americans give Democrats a chan...,That's a tricky little question there. [Laugh...,Deflection
3444,Inquiry about the belief regarding the abilit...,"Mike, I believe Iraq will be able to defend, ...",Explicit
3445,Are you resentful that some Republican candid...,"You know, no, I'm not resentful, nor am I res...",Explicit
3446,If you really didn't think that Republicans w...,"You know, no, I'm not resentful, nor am I res...",Deflection


In [66]:
filtered_quest_df = df_train[df_train['question'].str.contains('\?')]
len(filtered_quest_df)

2676

In [57]:
len(df_train[~df_train['question'].str.contains('\?')])

772

In [67]:
filtered_quest_df

Unnamed: 0,question,interview_answer,label
0,How would you respond to the accusation that t...,"Well, look, first of all, the—I am sincere ab...",Explicit
1,Do you think President Xi is being sincere abo...,"Well, look, first of all, the—I am sincere ab...",General
2,Do you believe the country's slowdown and gro...,"Look, I think China has a difficult economic ...",Partial/half-answer
3,Are you worried about the meeting between Pre...,"Look, I think China has a difficult economic ...",Dodging
4,Is the President's engagement with Asian coun...,"Well, I hope I get to see Mr. Xi sooner than ...",Explicit
...,...,...,...
3442,What happens if that patience runs out?,"See, that's that hypothetical Keil is trying ...",Deflection
3443,Why shouldn't Americans give Democrats a chan...,That's a tricky little question there. [Laugh...,Deflection
3445,Are you resentful that some Republican candid...,"You know, no, I'm not resentful, nor am I res...",Explicit
3446,If you really didn't think that Republicans w...,"You know, no, I'm not resentful, nor am I res...",Deflection


In [69]:
filtered_desc_df = df_train[df_train['question'].str.contains(r'^[^:]+: [^:]+$')]
filtered_desc_df

Unnamed: 0,question,interview_answer,label
142,Who do you think would be the tougher competit...,It will be fun watching them take on each oth...,Deflection
163,Taiwan defense: The question asks if the Presi...,Yes.,Explicit
177,"And to clarify, on chemical weapons: Could—if ...","It would trigger a response in kind, whether ...",Declining to answer
182,Assurance regarding the Nord Stream project: D...,The first question first. If Germany—if Russi...,Explicit
183,Definition of invasion: Did President Biden a...,The first question first. If Germany—if Russi...,Explicit
...,...,...,...
3320,Surprise: Does the increasing number of troops...,I am—what I hear from commanders is that the ...,Deflection
3321,Concern: Does the growing sentiment of troops...,I am—what I hear from commanders is that the ...,Implicit
3322,Awareness: Are you aware of the growing numbe...,I am—what I hear from commanders is that the ...,Partial/half-answer
3323,Surprise: Does the increasing number of troop...,I am—what I hear from commanders is that the ...,Dodging


In [None]:
I'm only against—what I'm against is, I'm against doing anything where the people aren't taken care of, and the people aren't being taken care of properly. We have—we want people to get money. It wasn't their fault that they got shut down. They got shut down by China. So whether it's the Post Office or whether it's the $3½ billion dollars—you know, they're asking for $3½ billion dollars just for the universal mail-in ballots, but they're not willing to make a deal. These are two points within a very big deal. The thing they want more than anything else, Kaitlan—and you know this—is bailout money for the States and for the cities that are in trouble, which, for the most part, are Democrat-run States and cities. So New York has a problem, California has a problem, Illinois has a tremendous problem, and others. They want to be able to bail out these States, and we don't want to be doing that or certainly don't want to do it to the extent. They're looking for $1 trillion; we don't want to be doing that.

In [None]:
Please, go ahead. President Jair Messias Bolsonaro of Brazil