In [1]:
import pandas as pd

def read_tsv( path ):
    """
    Read a tab-separated file with a header row.
    """
    return pd.read_csv( path, sep = "\t" )

train = read_tsv( "reminder_train.tsv" )
val   = read_tsv( "reminder_eval.tsv" )
test  = read_tsv( "reminder_test.tsv" )

train.sample( frac = 1 ).head( 10 )

Unnamed: 0,domain,utterance,semantic_parse
1111,reminder,Remind me to set up our 4th of July party today.,[IN:CREATE_REMINDER Remind [SL:PERSON_REMINDED...
14322,reminder,Reset all reminders to due tomorrow,[IN:UPDATE_REMINDER_DATE_TIME Reset [SL:AMOUNT...
15546,reminder,cancel all reminders for the week,[IN:DELETE_REMINDER cancel [SL:AMOUNT all ] re...
14955,reminder,what reminders to I have,[IN:GET_REMINDER what reminders to [SL:PERSON_...
4288,reminder,Set a reminder for Storm's housewarming party ...,[IN:CREATE_REMINDER Set a reminder for [SL:TOD...
8042,reminder,show meeting reminders for both jim and sally,[IN:GET_REMINDER [SL:METHOD_RETRIEVAL_REMINDER...
1597,reminder,set a daily reminder for mediation at eight th...,[IN:CREATE_REMINDER set a [SL:RECURRING_DATE_T...
2633,reminder,Set up a reminder for August 20 to have lunch ...,[IN:CREATE_REMINDER Set up a reminder [SL:DATE...
3809,reminder,What reminders do I have set for Wednesday?,[IN:GET_REMINDER What reminders do [SL:PERSON_...
14904,reminder,remind me to pay the water bill friday,[IN:CREATE_REMINDER remind [SL:PERSON_REMINDED...


In [2]:
#concatenate all data
combined = pd.concat( [ train, val, test ] )

def process( df ):
    #extract intent
    def extract_intent( row ):
        """Extracts intent from input format.
        i.e. "IN:GET_REMINDER Can you find me reminders of the event" -> "GET_REMINDER"
        """
        return row['semantic_parse'][row['semantic_parse'].find('IN:')+3:row['semantic_parse'].find(' ')]

    def correct_intent( row ):
        #Turn 'GET_REMINDER' into 'Get Reminder.'
        return row['intent'].replace( '_', ' ' ).title() + '.'
    
    df = df.rename( columns = { 'utterance': 'text' } )
    df['intent'] = df.apply( extract_intent, axis = 1 )
    df['intent'] = df.apply( correct_intent, axis = 1 )
    df = df.drop( columns = ['semantic_parse', 'domain'] )
    return df

#Fix Intent Labels
combined = process( combined )
combined.head( 10 )

Unnamed: 0,text,intent
0,remind me to write thank you letters to invited,Create Reminder.
1,Remind me of the information in the note secti...,Create Reminder.
2,remind me to take my meds at 8am and 6pm daily,Create Reminder.
3,I need to text Nicquana tonight at 7pm. Can yo...,Create Reminder.
4,Remind me to change my flight to New York.,Create Reminder.
5,delete my reminder to do my homework tonight,Delete Reminder.
6,please remind me on Saturday at 9 am to leave ...,Create Reminder.
7,Remind me 30 min before the Fluid dynamics exa...,Create Reminder.
8,Remind me at 5PM to schedule my hair appointme...,Create Reminder.
9,REMIND MY BAND TO BRING THEIR GEAR FOR TOMORRO...,Create Reminder.


In [3]:
combined.to_csv("data.csv")

In [5]:
train = process( train )
val   = process( val )
test  = process( test )

train.to_csv("data/train.csv", index=False)
val.to_csv("data/val.csv", index=False)
test.to_csv("data/test.csv", index=False)

In [5]:
#all intents in combined
print(combined.intent.unique())
#count of all intents in combined
print(combined.intent.value_counts())

['Create Reminder.' 'Delete Reminder.' 'Update Reminder.'
 'Update Reminder Todo.' 'Update Reminder Date Time.' 'Get Reminder.'
 'Get Reminder Date Time.' 'Get Reminder Location.' 'Get Reminder Amount.'
 'Help Reminder.']
intent
Create Reminder.              14837
Delete Reminder.               4401
Get Reminder.                  3773
Update Reminder Date Time.     1589
Get Reminder Date Time.         498
Update Reminder.                391
Update Reminder Todo.           240
Get Reminder Location.          174
Get Reminder Amount.            172
Help Reminder.                   58
Name: count, dtype: int64
