In [50]:
import pandas as pd

train = pd.read_csv( "train.csv" )
val   = pd.read_csv( "validation.csv" )
test  = pd.read_csv( "test.csv" )

train.sample( frac = 1 ).head( 10 )

Unnamed: 0,label,text
2562,BookRestaurant,Book an indonesian brasserie for seven people ...
11881,SearchScreeningEvent,What time is 30 Days of Night playing?
7536,RateBook,For The Curious Incident of the Dog in the Nig...
2813,BookRestaurant,Book a reservation for a restaurant serving bu...
4245,GetWeather,will it rain today in Circleville
11019,SearchCreativeWork,Show me the Caribbean Blue television show
2106,BookRestaurant,Book a pub with fisn'n chips in Timberville .
7752,RateBook,rate this novel a three
10134,SearchCreativeWork,Find a photograph called Call on Me .
5071,GetWeather,Will it be hotter at nine am in Serbia


In [51]:
#plot label distribution for train, val, and test on the same plot
import plotly.express as px

def get_label_count( df ):
    return df.groupby( "label" ).count().reset_index()

#Add train, val, and test data to plot
fig = px.bar(
    data_frame = get_label_count( train ),
    x = "label", 
    y = "text", 
    title = "Intent Label Distribution"
)

fig.add_bar( 
    x = get_label_count( val )[ "label" ],
    y = get_label_count( val )[ "text" ], 
    name = "Validation" 
)

fig.add_bar( 
    x = get_label_count( test )[ "label" ],
    y = get_label_count( test )[ "text" ], 
    name = "Test" 
)

#Update Axis & Legend
fig.update_layout( legend_title_text = "Dataset" )
fig.update_xaxes( title_text = "Intent" )
fig.update_yaxes( title_text = "Intent Frequency" )

fig.show()

## Preprocess SNIPS Data
Split up the words in each label.

In [52]:
label_mapping = {
    "AddToPlaylist": "Add To Playlist",
    "BookRestaurant": "Book Restaurant",
    "GetWeather": "Get Weather",
    "PlayMusic": "Play Music",
    "RateBook": "Rate Book",
    "SearchCreativeWork": "Search Creative Work",
    "SearchScreeningEvent": "Search Screening Event"
}

def clean_label( label ):
    return label_mapping[ label ] + "."

train[ "label" ] = train[ "label" ].apply( clean_label )
val[ "label" ]   = val[ "label" ].apply( clean_label )
test[ "label" ]  = test[ "label" ].apply( clean_label )

train.sample( frac = 1 ).head( 10 )

Unnamed: 0,label,text
10838,Search Creative Work.,I need to find the saga Trail of the Yukon
10076,Search Creative Work.,Is it possible to find the book Live in Europe...
7892,Rate Book.,Give my current book 4 stars .
2671,Book Restaurant.,Can you make reservations for 1 person in IL
9529,Search Creative Work.,play A Box of Birds trailer
3179,Book Restaurant.,Book reservation at The Big Chill Cafe in AR f...
10421,Search Creative Work.,find the So This Is Goodbye saga
2807,Book Restaurant.,"Book a table for four in Orchard Grass Hills , NV"
3930,Get Weather.,What's the weather forecast for Togo on April ...
8938,Rate Book.,"Out of 6 , I give the following book zero ."
