In [2]:
import pandas as pd

In [3]:
df=pd.read_csv('train.csv')

In [7]:
import numpy as np
import pandas as pd


def parse_bool_label(v):
    """
    Robust mapping to True / False / None.
    Accepts booleans, ints, and common strings like 'True','False','NOANNOTATION'.
    Returns: True | False | None
    """
    if pd.isna(v):
        return None
    if isinstance(v, bool):
        return v
    s = str(v).strip().lower()
    if s in {'true', 't', '1', 'yes', 'y'}:
        return True
    if s in {'false', 'f', '0', 'no', 'n'}:
        return False
   
    return None

# apply to df (assumes df already loaded)
df['sender_bool']   = df['sender_labels'].apply(parse_bool_label)
df['receiver_bool'] = df['receiver_labels'].apply(parse_bool_label)


# ---------- mapping function based on your specified logic ----------
def deception_state_from_bools(sender_b, receiver_b):
    
    # now both are booleans
    if sender_b and (not receiver_b):
        return 'no_deception'   # sender said true, receiver didn't => unsuccessful suspicion
    if sender_b and receiver_b:
        return 'no_deception'             # both true => no deception / no suspicion
    if (not sender_b) and receiver_b:
        return 'successful_deception'     # sender false, receiver true => deception succeeded
    if (not sender_b) and (not receiver_b):
        return 'successful_deception'   # both false => unsuccessful deception


# vectorized apply
df['deception_state'] = df.apply(lambda r: deception_state_from_bools(r['sender_bool'], r['receiver_bool']), axis=1)

# quick sanity counts
print(df['deception_state'].value_counts(dropna=False))

deception_state
no_deception            12541
successful_deception      591
Name: count, dtype: int64


In [8]:
df.head()

Unnamed: 0,messages,sender_labels,receiver_labels,speakers,receivers,absolute_message_index,relative_message_index,seasons,years,game_score,game_score_delta,players,game_id,sender_bool,receiver_bool,deception_state
0,Germany!\n\nJust the person I want to speak wi...,True,True,italy,germany,74,0,Spring,1901,3,0,"italy, germany",1,True,True,no_deception
1,"You've whet my appetite, Italy. What's the sug...",True,True,germany,italy,76,1,Spring,1901,3,0,"italy, germany",1,True,True,no_deception
2,👍,True,True,italy,germany,86,2,Spring,1901,3,0,"italy, germany",1,True,True,no_deception
3,It seems like there are a lot of ways that cou...,True,True,germany,italy,87,3,Spring,1901,3,0,"italy, germany",1,True,True,no_deception
4,"Yeah, I can’t say I’ve tried it and it works, ...",True,NOANNOTATION,italy,germany,89,4,Spring,1901,3,0,"italy, germany",1,True,,no_deception


In [9]:
df=df.drop(columns=['sender_labels','receiver_labels','sender_bool','receiver_bool','seasons','years'])

In [10]:
df.head()

Unnamed: 0,messages,speakers,receivers,absolute_message_index,relative_message_index,game_score,game_score_delta,players,game_id,deception_state
0,Germany!\n\nJust the person I want to speak wi...,italy,germany,74,0,3,0,"italy, germany",1,no_deception
1,"You've whet my appetite, Italy. What's the sug...",germany,italy,76,1,3,0,"italy, germany",1,no_deception
2,👍,italy,germany,86,2,3,0,"italy, germany",1,no_deception
3,It seems like there are a lot of ways that cou...,germany,italy,87,3,3,0,"italy, germany",1,no_deception
4,"Yeah, I can’t say I’ve tried it and it works, ...",italy,germany,89,4,3,0,"italy, germany",1,no_deception


In [9]:
df.shape

(13132, 10)

In [10]:
df['receivers'].value_counts()

receivers
germany    2851
italy      2376
england    2269
austria    1758
russia     1519
france     1220
turkey     1139
Name: count, dtype: int64