# Import libraries

In [None]:
!pip install gdown

In [None]:
import os
import gdown
import json
import pandas as pd
import urllib.request
from zipfile import ZipFile

# Mount drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Create directory to store data

In [None]:
os.makedirs('/content/drive/MyDrive/stance_detection_datasets/total_defense_memes', exist_ok=True)

# Download **TotalDefMeme**

Nirmalendu Prakash, Ming Shan Hee, and Roy Ka-Wei Lee. 2023. TotalDefMeme: A Multi-Attribute Meme dataset on Total Defence in Singapore. In Proceedings of the 14th Conference on ACM Multimedia Systems (MMSys '23). Association for Computing Machinery, New York, NY, USA, 369–375. https://doi.org/10.1145/3587819.3592545

In [None]:
# Download large file from Google Drive. via
# https://github.com/wkentaro/gdown
url = 'https://drive.google.com/u/0/uc?id=1oJIh4QQS3Idff2g6bZORstS5uBROjUUz'
output = '/content/drive/MyDrive/stance_detection_datasets/total_defense_memes/total_defense_memes.zip'
gdown.download(url, output, quiet=False)

In [None]:
# Unzip file. via
# https://www.geeksforgeeks.org/unzipping-files-in-python/
with ZipFile('/content/drive/MyDrive/stance_detection_datasets/total_defense_memes/total_defense_memes.zip',
             'r') as zObject:
    zObject.extractall('/content/drive/MyDrive/stance_detection_datasets/total_defense_memes/')

In [None]:
os.remove('/content/drive/MyDrive/stance_detection_datasets/total_defense_memes/total_defense_memes.zip')

In [None]:
urllib.request.urlretrieve(
    'https://raw.githubusercontent.com/Social-AI-Studio/Total-Defense-Memes/main/report/annotation.json',
    '/content/drive/MyDrive/stance_detection_datasets/total_defense_memes/total_defense_memes.json')

# Load **TotalDefMeme Dataset**

In [None]:
data = json.load(open('/content/drive/MyDrive/stance_detection_datasets/total_defense_memes/total_defense_memes.json'))
# Build dataframe from list of dictionaries. via
# https://stackoverflow.com/a/72327549
total_defense_memes = pd.DataFrame.from_dict({k: v for d in data['Pillar_Stances'] for k,v in d.items()}, orient='index')
total_defense_memes = total_defense_memes.rename(columns={0: 'first', 1: 'second', 2: 'third'})
total_defense_memes['image'] = total_defense_memes.index
total_defense_memes = total_defense_memes.reset_index(drop=True)

# Reshape dataset

In [None]:
total_defense_memes = pd.melt(total_defense_memes, id_vars='image', value_vars=['first', 'second', 'third'], value_name='pillars_and_stances')
total_defense_memes = total_defense_memes.drop('variable', axis=1)
total_defense_memes = total_defense_memes.dropna().reset_index(drop=True)
total_defense_memes[['pillar','stance_list']] = total_defense_memes['pillars_and_stances'].apply(pd.Series)

# Subset rows where all annotators agree

In [None]:
# Filter rows where list-column contains a specific set of items. via
# https://stackoverflow.com/a/63094115
total_defense_memes['joined'] = total_defense_memes['stance_list'].str.join(sep=',')
total_defense_memes = total_defense_memes[(total_defense_memes['joined'].isin(['Neutral,Neutral'])) | \
    (total_defense_memes['joined'].isin(['Neutral,Neutral,Neutral'])) | \
    (total_defense_memes['joined'].isin(['Supportive,Supportive'])) | \
    (total_defense_memes['joined'].isin(['Supportive,Supportive,Supportive'])) | \
    (total_defense_memes['joined'].isin(['Against,Against'])) | \
    (total_defense_memes['joined'].isin(['Against,Against,Against']))]
total_defense_memes.insert(5, 'stance', total_defense_memes['stance_list'].apply(lambda x: x[0]))
total_defense_memes = total_defense_memes.drop('joined', axis=1)
total_defense_memes = total_defense_memes.reset_index(drop=True)

# Prefix image names with image directories

In [None]:
total_defense_memes['image'] = '/content/drive/MyDrive/stance_detection_datasets/total_defense_memes/TD_Memes/' + total_defense_memes['image'].astype(str)

# Save dataset in CSV format

In [None]:
total_defense_memes.to_csv('/content/drive/MyDrive/stance_detection_datasets/total_defense_memes/total_defense_memes.csv', index=False)