In [1]:
import pandas as pd

def read_tsv( path ):
    """
    Read a tab-separated file with a header row.
    """
    return pd.read_csv( path, sep = "\t" )

train = read_tsv( "reminder_train.tsv" )
val   = read_tsv( "reminder_eval.tsv" )
test  = read_tsv( "reminder_test.tsv" )

train.sample( frac = 1 ).head( 10 )

Unnamed: 0,domain,utterance,semantic_parse
11222,reminder,remind me to get Milk,[IN:CREATE_REMINDER remind [SL:PERSON_REMINDED...
5709,reminder,Remind me to check the coffee in 5 minutes,[IN:CREATE_REMINDER Remind [SL:PERSON_REMINDED...
4141,reminder,Remind me to bring a white shirt for the meeti...,[IN:CREATE_REMINDER Remind [SL:PERSON_REMINDED...
4272,reminder,Create reminder to make dentist appointment fo...,[IN:CREATE_REMINDER Create reminder to [SL:TOD...
15074,reminder,"My laundry will be done in 45 minutes, please ...",[IN:CREATE_REMINDER [SL:TODO [IN:GET_TODO [SL:...
15186,reminder,Remind me to work out Thursday.,[IN:CREATE_REMINDER Remind [SL:PERSON_REMINDED...
13643,reminder,Set a reminder for my meeting with Milania tom...,[IN:CREATE_REMINDER Set a reminder for [SL:PER...
17242,reminder,Remind me to talk to Ben about Georgia,[IN:CREATE_REMINDER Remind [SL:PERSON_REMINDED...
3606,reminder,I need to buy more diapers; will you remind me...,[IN:CREATE_REMINDER I need to [SL:TODO buy mor...
10514,reminder,Delete the reminder for Teresa's doctor appoin...,[IN:DELETE_REMINDER Delete the reminder for [S...


In [4]:
#concatenate all data
combined = pd.concat( [ train, val, test ] )

def process( df ):
    #extract intent
    def extract_intent( row ):
        """Extracts intent from input format.
        i.e. "IN:GET_REMINDER Can you find me reminders of the event" -> "GET_REMINDER"
        """
        return row['semantic_parse'][row['semantic_parse'].find('IN:')+3:row['semantic_parse'].find(' ')]

    def correct_intent( row ):
        #Turn 'GET_REMINDER' into 'Get Reminder.'
        return row['intent'].replace( '_', ' ' ).title() + '.'
    
    df = df.rename( columns = { 'utterance': 'text' } )
    df['intent'] = df.apply( extract_intent, axis = 1 )
    df['intent'] = df.apply( correct_intent, axis = 1 )
    df = df.drop( columns = ['semantic_parse', 'domain'] )
    return df

#Fix Intent Labels
combined = process( combined )
combined.head( 10 )

Unnamed: 0,text,intent
0,remind me to write thank you letters to invited,Create Reminder.
1,Remind me of the information in the note secti...,Create Reminder.
2,remind me to take my meds at 8am and 6pm daily,Create Reminder.
3,I need to text Nicquana tonight at 7pm. Can yo...,Create Reminder.
4,Remind me to change my flight to New York.,Create Reminder.
5,delete my reminder to do my homework tonight,Delete Reminder.
6,please remind me on Saturday at 9 am to leave ...,Create Reminder.
7,Remind me 30 min before the Fluid dynamics exa...,Create Reminder.
8,Remind me at 5PM to schedule my hair appointme...,Create Reminder.
9,REMIND MY BAND TO BRING THEIR GEAR FOR TOMORRO...,Create Reminder.


In [3]:
combined.to_csv("data.csv")

In [5]:
train = process( train )
val   = process( val )
test  = process( test )

train.to_csv("data/train.csv", index=False)
val.to_csv("data/val.csv", index=False)
test.to_csv("data/test.csv", index=False)