In [11]:
import pandas as pd

# load the original CSV
df = pd.read_csv("../datasets/raw/emotion-emotion_69k.csv")

# preview columns
print("Original Columns:")
print(df.columns.tolist())

Original Columns:
['Unnamed: 0', 'Situation', 'emotion', 'empathetic_dialogues', 'labels', 'Unnamed: 5', 'Unnamed: 6']


In [12]:
# rename first column to 'sn' if unnamed
first_col = df.columns[0]
if "Unnamed" in first_col or first_col.strip() == "":
    df.rename(columns={first_col: "sn"}, inplace=True)

# drop other junk columns, but keep 'sn'
df = df.loc[:, ~df.columns.str.contains('^Unnamed') | df.columns.str.contains('sn')]

# keep only the needed columns (if they exist)
keep_cols = ["sn", "Situation", "emotion", "empathetic_dialogues", "labels"]
df = df[[col for col in keep_cols if col in df.columns]]

# removing duplicates from Situation column and keeping first occurence only
df = df.drop_duplicates(subset=["Situation"], keep="first")

# validation: Check for missing values
print("\n Missing Value Check:")
missing_summary = df.isnull().sum()
print(missing_summary)



 Missing Value Check:
sn                      0
Situation               0
emotion                 1
empathetic_dialogues    0
labels                  0
dtype: int64


In [13]:
# drop rows with any missing data
initial_len = len(df)
df.dropna(inplace=True)
cleaned_len = len(df)

print(f"\n cleaned dataset: {initial_len - cleaned_len} rows with missing values removed")
print(f" Final row count: {cleaned_len}")


 cleaned dataset: 1 rows with missing values removed
 Final row count: 19204


In [14]:
# show a few cleaned rows
print("\n Sample Cleaned Data:")
print(df.head(3))

# save cleaned dataset
df.to_csv("../datasets/cleaned_empathetic_dataset.csv", index=False)
print("\n Saved as 'cleaned_empathetic_dataset.csv'")


 Sample Cleaned Data:
    sn                                          Situation      emotion  \
0    0  I remember going to the fireworks with my best...  sentimental   
5    5                       i used to scare for darkness       afraid   
10  10  I showed a guy how to run a good bead in weldi...        proud   

                                 empathetic_dialogues  \
0   Customer :I remember going to see the firework...   
5   Customer : it feels like hitting to blank wall...   
10      Customer :Hi how are you doing today\nAgent :   

                                               labels  
0   Was this a friend you were in love with, or ju...  
5                       Oh ya? I don't really see how  
10                         doing good.. how about you  

 Saved as 'cleaned_empathetic_dataset.csv'
