In [1]:
import pandas as pd

def read_tsv( path ):
    """
    Read a tab-separated file with a header row.
    """
    return pd.read_csv( path, sep = "\t" )

train = read_tsv( "weather_train.tsv" )
val   = read_tsv( "weather_eval.tsv" )
test  = read_tsv( "weather_test.tsv" )

train.sample( frac = 1 ).head( 10 )

Unnamed: 0,domain,utterance,semantic_parse
7530,weather,Will it rain tonight?,[IN:GET_WEATHER Will it [SL:WEATHER_ATTRIBUTE ...
9689,weather,What time is the weather forecast for rain in ...,[IN:GET_WEATHER What time is the weather forec...
12724,weather,what is the record high temperature,[IN:UNSUPPORTED_WEATHER what is the record hig...
18674,weather,Get me today's weather,[IN:GET_WEATHER Get me [SL:DATE_TIME today ] '...
7337,weather,Will it be sunny in San Francisco?,[IN:GET_WEATHER Will it be [SL:WEATHER_ATTRIBU...
17189,weather,how much rain can be expected in cm tonight?,[IN:UNSUPPORTED_WEATHER how much [SL:WEATHER_A...
19851,weather,How often is the sun out in Melbourne?,[IN:UNSUPPORTED_WEATHER How often is the sun o...
5476,weather,What time is sunset tomorrow?,[IN:GET_SUNSET What time is sunset [SL:DATE_TI...
22933,weather,let me know the temperature in turkey,[IN:GET_WEATHER let me know the temperature in...
3284,weather,Carbondale Forecast,[IN:GET_WEATHER [SL:LOCATION Carbondale ] Fore...


In [2]:
#concatenate all data
combined = pd.concat( [ train, val, test ] )

def process( df ):
    #extract intent
    def extract_intent( row ):
        """Extracts intent from input format.
        i.e. "IN:GET_WEATHER Can you find me reminders of the event" -> "GET_WEATHER"
        """
        return row['semantic_parse'][row['semantic_parse'].find('IN:')+3:row['semantic_parse'].find(' ')]

    def correct_intent( row ):
        #Turn 'GET_WEATHER' into 'Get Weather.'
        return row['intent'].replace( '_', ' ' ).title() + '.'
    
    df = df.rename( columns = { 'utterance': 'text' } )
    df['intent'] = df.apply( extract_intent, axis = 1 )
    df['intent'] = df.apply( correct_intent, axis = 1 )
    df = df.drop( columns = ['semantic_parse', 'domain'] )
    return df

#Fix Intent Labels
combined = process( combined )
combined.head( 10 )

Unnamed: 0,text,intent
0,Whats the weather supposed to be today,Get Weather.
1,how cold is it?,Get Weather.
2,Can I get the 10 day forecast for Phoenix?,Get Weather.
3,What is the weather news for today?,Get Weather.
4,Tell me the weather conditions,Get Weather.
5,What's the weather like Saturday?,Get Weather.
6,where is it going to rain in the area today,Get Weather.
7,Check upcoming weather reports.,Get Weather.
8,How warm is it outside right now?,Get Weather.
9,Is it cold out?,Get Weather.


In [3]:
combined.to_csv("data.csv")

In [4]:
train = process( train )
val   = process( val )
test  = process( test )

train.to_csv("data/train.csv", index=False)
val.to_csv("data/val.csv", index=False)
test.to_csv("data/test.csv", index=False)