In [None]:
import csv
import pandas as pd

label_names = {0: 'positive', 1: 'negative', 2: 'neutral'}
ttlab_mapping = [+1, -1, 0]

csv_delimiter = "\t"
# Remap "hostile" label (3) to "negative" (1) because the model does not yet support 4 classes
label_remap = {3: 1}

def load_data_file(csv_path, split_name):
    lines = []

    with open(csv_path, 'r') as f:
        reader = csv.reader(f, delimiter=csv_delimiter)
        for ind, row in enumerate(reader):
            if len(row) != 2:
                raise ValueError('Invalid row encountered.')
            # export raw data
            #text = self.clean_text(row[0])
            text = row[0]
            label = int(row[1])
            # If the label has an entry in the label_remap dict,
            # it is remapped accordingly. Else, the label is kept.
            label = label_remap.get(label, label)

            lines.append({
                "text": text,
                "original_label": label,
                "ttlab_label": ttlab_mapping[label],
                "split": split_name,
                "id": ind,
                "sentiment": label_names[label]
            })

    df = pd.DataFrame(lines)
    return df

df_train = load_data_file("/home/daniel/data/uni/masterarbeit-sentiment/data/datasets/mdraw/train.csv", "train")
df_dev = load_data_file("/home/daniel/data/uni/masterarbeit-sentiment/data/datasets/mdraw/validation.csv", "dev")

df = pd.concat([df_train, df_dev])
df.set_index("id", inplace=True)
df["dataset"] = f"german-news-sentiment-bert"
df

In [2]:
# export to db
import sqlite3

db_dir = "/home/daniel/data/uni/masterarbeit-sentiment/data/datasets/experiments/de/3sentiment"
db_file = f"{db_dir}/datasets.db"

con = sqlite3.connect(db_file)
df.to_sql("dataset", con=con, index=True, index_label="id", if_exists='append')
con.close()