In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
import string

# Function to convert a row to .dat format
# Function to convert a row to .dat format
def convert_row_to_dat(row, emotion_map):
    sentence = row['Review Sentence']
    words = sentence.split()
    aspect_term = row['Aspect term']
    polarity = row['polarity']
    from_idx = row['from']
    to_idx = row['to']
    emotion_class = row['Emotion Class']

    dat_lines = []
    current_idx = 0

    for word in words:
        clean_word = word.strip(string.punctuation)  # Remove punctuation from the word
        word_start_idx = sentence.find(clean_word, current_idx)
        word_end_idx = word_start_idx + len(clean_word)

        # Determine aspect term tag and polarity tag
        if word_start_idx >= from_idx and word_end_idx <= to_idx:
            aspect_tag = 'B-ASP' if sentence[from_idx:to_idx+1].find(clean_word) == 0 else 'I-ASP'
            polarity_tag = {
                'negative': 0,
                'neutral': 1,
                'positive': 3
            }.get(polarity, -1)
            emotion_tag = emotion_map[emotion_class]  # Assign the emotion class to all words
        else:
            aspect_tag = 'O'
            polarity_tag = -1  # Use 0 (negative) for all other words
            emotion_tag = -1


        dat_line = f"{word} {aspect_tag} {polarity_tag} {emotion_tag}"
        dat_lines.append(dat_line)
        current_idx = word_end_idx

    dat_lines.append("")  # Add a blank line to separate sentences
    return "\n".join(dat_lines)
def convert_and_save(df, output_dat):
    # Manually specify the mapping of emotion classes to integers
    emotion_map = {
        "Anger": 0,
        "Disgust": 1,
        "Fear": 2,
        "Joy": 3,
        "Sadness": 4,
        "Surprise": 5
    }

    dat_content = df.apply(lambda row: convert_row_to_dat(row, emotion_map), axis=1).str.cat(sep='\n')
    with open(output_dat, 'w') as f:
        f.write(dat_content)
    print(f"Data successfully converted to {output_dat} format.")

In [7]:
# Read the single CSV file
df = pd.read_csv('Annotated ABSA with Emotions Dataset.csv')
df


Unnamed: 0,id,Review Sentence,Aspect term,polarity,from,to,Anger,Disgust,Fear,Joy,Sadness,Surprise,Emotion Class
0,3121.0,But the staff was so horrible to us.,staff,negative,8,13,4.0,1.0,1.0,0.0,3.0,1.0,Anger
1,2777.0,"To be completely fair, the only redeeming fact...",food,positive,57,61,0.0,0.0,0.0,2.0,0.0,0.0,Joy
2,1634.0,"The food is uniformly exceptional, with a very...",food,positive,4,8,0.0,0.0,0.0,4.0,0.0,0.0,Joy
3,1634.0,"The food is uniformly exceptional, with a very...",kitchen,positive,55,62,0.0,0.0,0.0,4.0,0.0,0.0,Joy
4,1634.0,"The food is uniformly exceptional, with a very...",menu,neutral,141,145,0.0,0.0,0.0,4.0,0.0,0.0,Joy
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4828,,Each table has a pot of boiling water sunken i...,pot of boiling water,neutral,17,37,2.0,2.0,0.0,1.0,0.0,0.0,Anger
4829,,Each table has a pot of boiling water sunken i...,meats,neutral,99,104,0.0,0.0,0.0,3.0,0.0,1.0,Joy
4830,,Each table has a pot of boiling water sunken i...,vegetables,neutral,114,124,0.0,0.0,0.0,2.0,0.0,1.0,Joy
4831,,Each table has a pot of boiling water sunken i...,rice,neutral,130,134,0.0,0.0,0.0,2.0,0.0,1.0,Joy


In [9]:
# Calculate the number of rows for training and testing
train_size = int(0.7 * len(df))
test_size = len(df) - train_size

# Split the data into training and testing sets
train_df = df.iloc[:train_size]
test_df = df.iloc[train_size:]

# Convert and save the training and testing data
convert_and_save(train_df, 'Restaurants.atepc.train.dat')
convert_and_save(test_df, 'Restaurants.atepc.test.dat')

Data successfully converted to Restaurants.atepc.train.dat format.
Data successfully converted to Restaurants.atepc.test.dat format.


In [41]:
"""
"Anger": 0
"Disgust": 1
"Fear": 2
"Joy": 3
"Sadness": 4
"Surprise": 5
"""