# Named Entity Recognition Using spaCy
by Robert Ayub Odhiambo (http://www.a4ayub.me)

This notebook is for illustration purposes only using sample data with no intention to deliver a model with high accuracy. This is to be used for the meetup:

https://www.eventbrite.com/e/pieai-nairobi-fundamentals-of-building-a-retrieval-based-chatbot-tickets-113045827112?aff=ebdssbonlinesearch&utm-medium=discovery&utm-campaign=social&utm-content=attendeeshare&utm-source=cp&utm-term=destsearch

In [1]:
# Import the libraries
# Standard Imports
import pandas as pd
import numpy as np
import nltk
from nltk.tokenize import TweetTokenizer
nltk.download('wordnet')

import spacy
import en_core_web_sm
spacy_nlp = en_core_web_sm.load()

import random
from spacy.util import minibatch, compounding
from pathlib import Path

# Data Cleaning Imports
from bs4 import BeautifulSoup
import re

# Modeling Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer, 
CountVectorizer, TfidfTransformer
, CountVectorizer, TfidfTransformer
from sklearn.pipeline import Pipeline
from sklearn.multiclass import OneVsRestClassifier

from sklearn.svm import LinearSVC

# Testing
from sklearn.model_selection import train_test_split


# Metrics
from sklearn.metrics import classification_report,
accuracy_score,precision_score, recall_score, f1_score, confusion_matrix

# Export as pickle file
import joblib

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\rodhiambo2\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [2]:
# Pandas Settings
pd.set_option('display.max_colwidth', -1)
pd.set_option("display.max_columns", 250)
pd.set_option("display.max_rows", 1000)

In [3]:
# Import the data
flights_df = pd.read_csv("dataset/flight-searches.csv",engine="python");

In [4]:
flights_df.sample(10)

Unnamed: 0,searchTerms,action_id,search_string,snippet,displayLink,link,queryTime,totalResults,cacheId,formattedUrl,htmlFormattedUrl,htmlSnippet,htmlTitle,kind,pagemap,cseName,count,startIndex,inputEncoding,outputEncoding,safe,cx,gl,searchTime,formattedSearchTime,formattedTotalResults,Unnamed: 26,Unnamed: 27,Unnamed: 28,Unnamed: 29,Unnamed: 30,Unnamed: 31,Unnamed: 32,Unnamed: 33,Unnamed: 34,Unnamed: 35,Unnamed: 36,Unnamed: 37,Unnamed: 38,Unnamed: 39,Unnamed: 40,Unnamed: 41,Unnamed: 42,Unnamed: 43,Unnamed: 44,Unnamed: 45,Unnamed: 46,Unnamed: 47,Unnamed: 48,Unnamed: 49,Unnamed: 50,Unnamed: 51,Unnamed: 52,Unnamed: 53,Unnamed: 54,Unnamed: 55,Unnamed: 56,Unnamed: 57,Unnamed: 58,Unnamed: 59,Unnamed: 60,Unnamed: 61,Unnamed: 62,Unnamed: 63,Unnamed: 64,Unnamed: 65,Unnamed: 66,Unnamed: 67,Unnamed: 68,Unnamed: 69,Unnamed: 70,Unnamed: 71,Unnamed: 72,Unnamed: 73,Unnamed: 74,Unnamed: 75,Unnamed: 76,Unnamed: 77,Unnamed: 78,Unnamed: 79,Unnamed: 80,Unnamed: 81,Unnamed: 82,Unnamed: 83,Unnamed: 84,Unnamed: 85,Unnamed: 86,Unnamed: 87,Unnamed: 88,Unnamed: 89,Unnamed: 90,Unnamed: 91,Unnamed: 92,Unnamed: 93,Unnamed: 94,Unnamed: 95,Unnamed: 96,Unnamed: 97,Unnamed: 98,Unnamed: 99,Unnamed: 100,Unnamed: 101,Unnamed: 102,Unnamed: 103,Unnamed: 104,Unnamed: 105,Unnamed: 106,Unnamed: 107,Unnamed: 108,Unnamed: 109,Unnamed: 110,Unnamed: 111,Unnamed: 112,Unnamed: 113,Unnamed: 114,Unnamed: 115,Unnamed: 116,Unnamed: 117,Unnamed: 118,Unnamed: 119
797,flights to honolulu,8,Flights to Honolulu (HNL) on Orbitz.com,"Flights to Honolulu (HNL): Search on Orbitz for cheap Honolulu flights, airlines, \nand airfares to Honolulu.",www.orbitz.com,https://www.orbitz.com/Cheap-Flights-To-Honolulu.d1488.Travel-Guide-Flights,2019-01-01 11:26:30.620552+00:00,19200000,4xs7NHZl5IoJ,https://www.orbitz.com/Cheap-Flights-To-Honolulu.d1488.Travel-Guide- Flights,https://www.orbitz.com/Cheap-<b>Flights-To-Honolulu</b>.d1488.Travel-Guide- <b>Flights</b>,"<b>Flights to Honolulu</b> (HNL): Search on Orbitz for cheap Honolulu flights, airlines, <br>\nand airfares to Honolulu.",<b>Flights to Honolulu</b> (HNL) on Orbitz.com,customsearch#result,"{'metatags': [{'oip.set': 'false', 'oip.yes': 'false', 'oip.no': 'false', 'oip.cantrack': 'true', 'viewport': 'width=device-width, initial-scale=1.0', 'ewe:page-epoch': '1546299199322', 'ewe:page': 'FLEX', 'flex:page-type': 'TRAVEL_GUIDE_FLIGHTS', 'flex:template-id': '70201-en_US-Travel-Guide-Flights-flow-mobile', 'flex:template-fm-id': '1352', 'flex:template-version': '38', 'essclientid': 'flex.flight.Travel-Guide-Flights', 'og:title': 'Flights to Honolulu (HNL) on Orbitz.com', 'og:type': 'website', 'og:description': 'Flights to Honolulu (HNL): Search on Orbitz for cheap Honolulu flights, airlines, and airfares to Honolulu', 'og:locale': 'en_US', 'og:site_name': 'Orbitz.com', 'og:url': 'https://www.orbitz.com/Cheap-Flights-To-Honolulu.d1488.Travel-Guide-Flights'}], 'listitem': [{'item': 'Orbitz.com', 'name': 'Orbitz.com', 'url': '/', 'position': '1'}, {'item': 'Flights', 'name': 'Flights', 'url': '/Flights', 'position': '2'}, {'item': 'United States of America', 'name': 'United States of America', 'url': '/Destinations-In-United-States-Of-America.d201.Flight-Destinations', 'position': '3'}, {'item': 'Hawaii', 'name': 'Hawaii', 'url': '/Destinations-In-Hawaii.d213.Flight-Destinations', 'position': '4'}, {'name': 'Flights to Honolulu', 'url': 'http://www.orbitz.com/Cheap-Flights-To-Honolulu.d1488.Travel-Guide-Flights'}], 'map': [{'url': 'https://maps.google.com/maps?ll=21.30954,-157.8616&z=11'}]}",PySearch,10,1,utf8,utf8,off,012859022920491477448:pubdbfjmmec,us,0.28267,0.28,19200000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1777,tickets to auckland,7,Cheap Flights to Auckland from $285 - Cheapflights.com.au,"The simple way to find cheap flights to Auckland. Quick and easy, cheapflights.\ncom.au finds the lowest prices on Auckland flights.",www.cheapflights.com.au,https://www.cheapflights.com.au/flights-to-Auckland/,2019-01-01 11:26:34.465956+00:00,17800000,YyXMJx0TSEMJ,https://www.cheapflights.com.au/flights-to-Auckland/,https://www.cheap<b>flights</b>.com.au/<b>flights-to-Auckland</b>/,"The simple way to find cheap <b>flights to Auckland</b>. Quick and easy, cheapflights.<br>\ncom.au finds the lowest prices on Auckland flights.",Cheap <b>Flights to Auckland</b> from $285 - Cheapflights.com.au,customsearch#result,"{'cse_thumbnail': [{'width': '150', 'height': '180', 'src': 'https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcRZ0mczUI_6_RMqABWm5cJqq0CGg6ZUctjNN-Rx4kRgSB0rV5LNcAWHWYo'}], 'metatags': [{'viewport': 'width=device-width, initial-scale=1', 'format-detection': 'telephone=no', 'r9-version': 'R446c', 'r9-built': '20181220.035044', 'r9-rendered': '20181225.001515.EST', 'og:image': 'https://www.cheapflights.com.au/rimg/provider-logos/common/socialmedia/cheapflights-logo.png?width=1200&height=630&crop=false', 'og:image:width': '1200', 'og:image:height': '630', 'og:title': 'Cheap Flights to Auckland from $285 - Cheapflights.com.au', 'og:type': 'website', 'og:description': 'The simple way to find cheap flights to Auckland. Quick and easy, cheapflights.com.au finds the lowest prices on Auckland flights.', 'og:url': 'https://www.cheapflights.com.au/flights-to-Auckland/', 'og:site_name': 'Cheapflights', 'fb:pages': '90811893045', 'twitter:card': 'summary_large_image', 'twitter:site': '@Cheapflights', 'twitter:creator': '@Cheapflights', 'twitter:title': 'Cheap Flights to Auckland from $285 - Cheapflights.com.au', 'twitter:description': 'The simple way to find cheap flights to Auckland. Quick and easy, cheapflights.com.au finds the lowest prices on Auckland flights.', 'twitter:image:src': 'https://www.cheapflights.com.au/rimg/provider-logos/common/socialmedia/cheapflights-logo.png?width=440&height=220&crop=false', 'kayak_page': 'flight,static-flight-route-cf,splits'}], 'breadcrumb': [{'url': 'Home', 'title': 'Home'}, {'url': 'Oceania', 'title': 'Oceania'}, {'url': 'New Zealand', 'title': 'New Zealand'}], 'cse_image': [{'src': 'https://www.cheapflights.com.au/rimg/dimg/16/9f/587315ec-city-43982-1644186c27a.jpg?width=150&height=180&xhint=3565&yhint=1890&crop=true'}]}",PySearch,10,1,utf8,utf8,off,012859022920491477448:pubdbfjmmec,us,0.269792,0.27,17800000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1012,tickets to bangkok,3,Cheap Flights to Bangkok from Â£266 in 2019 | Skyscanner,Flights to Bangkok. We compare all major airlines and travel agents for cheap \nBangkok flights. Search and Save now at Skyscanner.net.,www.skyscanner.net,https://www.skyscanner.net/flights-to/bkkt/cheap-flights-to-bangkok.html,2019-01-01 11:26:31.622956+00:00,57000000,F8cw4EpkRLoJ,https://www.skyscanner.net/flights-to/.../cheap-flights-to-bangkok.html,https://www.skyscanner.net/<b>flights</b>-to/.../cheap-<b>flights-to-bangkok</b>.html,<b>Flights to Bangkok</b>. We compare all major airlines and travel agents for cheap <br>\nBangkok flights. Search and Save now at Skyscanner.net.,Cheap <b>Flights to Bangkok</b> from Â£266 in 2019 | Skyscanner,customsearch#result,"{'cse_thumbnail': [{'width': '200', 'height': '200', 'src': 'https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcQwJ7rz_K1OMjPsqqDB205BJ9-b_0goeGaNJ5I77d-zJJzZxiYY--ci6LYv'}], 'product': [{'name': 'Flights to Bangkok'}], 'metatags': [{'viewport': 'width=device-width, initial-scale = 1.0, maximum-scale=1.0, user-scalable=no', 'theme-color': '#008ca8', 'msapplication-tilecolor': '#21c4d9', 'skype_toolbar': 'SKYPE_TOOLBAR_PARSER_COMPATIBLE', 'y_key': '3ccd97fca3bc5b05', 'msapplication-id': 'App', 'msapplication-packagefamilyname': 'Skyscanner.Skyscanner_623c9he0pwcym', 'msapplication-minversion': '1.2.0.0', 'og:title': 'Found the cheapest flights to Bangkok. Prices from Â£340.', 'og:url': 'https://www.skyscanner.net/flights-to/bkkt/cheap-flights-to-bangkok.html?utm_medium=social&utm_campaign=addthis&utm_source=facebook_uk', 'og:image': 'https://www.skyscanner.net/images/opengraph_v1.png', 'og:description': 'Skyscanner compares hundreds of airlines worldwide for free. It finds the cheapest flights fast: saves you time, saves you money.', 'og:type': 'product', 'place:location:latitude': '13.689689', 'place:location:longitude': '100.742483'}], 'hproduct': [{'fn': 'Flights to Bangkok'}], 'cse_image': [{'src': 'https://www.skyscanner.net/images/opengraph_v1.png'}]}",PySearch,10,1,utf8,utf8,off,012859022920491477448:pubdbfjmmec,us,0.259623,0.26,57000000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3802,tickets to artvin,1,Economy Flights to Artvin - TripAdvisor,Economy Flights to Artvin: Enter your dates once and have TripAdvisor search \nmultiple sites to find the best prices on Artvin flights.,www.tripadvisor.com,https://www.tripadvisor.com/Flights-g652371-zfc1-Artvin_Artvin_Province_Turkish_Black_Sea_Coast-Cheap_Discount_Airfares.html,2019-01-01 11:26:42.763140+00:00,92500,8yi32fkmyskJ,https://www.tripadvisor.com/Flights-g652371-zfc1-Artvin_Artvin_Province_ Turkish_Black_Sea_Coast-Cheap_Discount_Airfares.html,https://www.tripadvisor.com/<b>Flights</b>-g652371-zfc1-<b>Artvin</b>_<b>Artvin</b>_Province_ Turkish_Black_Sea_Coast-Cheap_Discount_Airfares.html,Economy <b>Flights to Artvin</b>: Enter your dates once and have TripAdvisor search <br>\nmultiple sites to find the best prices on Artvin flights.,Economy <b>Flights to Artvin</b> - TripAdvisor,customsearch#result,"{'cse_thumbnail': [{'width': '300', 'height': '168', 'src': 'https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcRklC6DssEEHHyOk_nWRmG8Ozt9QKM9VZirDqqvWTjYLF-1z_MaQJ7QMjU'}], 'metatags': [{'theme-color': '#00a680', 'format-detection': 'telephone=no', 'og:title': 'Economy Flights to Artvin - TripAdvisor', 'og:description': 'Economy Flights to Artvin: Enter your dates once and have TripAdvisor search multiple sites to find the best prices on Artvin flights.', 'og:image': 'https://media-cdn.tripadvisor.com/media/photo-s/0a/b7/b7/cf/borcka-karagol.jpg', 'og:image:width': '550', 'og:image:height': '309', 'og:type': 'website', 'og:url': 'http://www.tripadvisor.com/Flights-g652371-zfc1-Artvin_Artvin_Province_Turkish_Black_Sea_Coast-Cheap_Discount_Airfares.html', 'og:site_name': 'TripAdvisor', 'fb:admins': '100000982334629', 'fb:app_id': '162729813767876', 'fb:pages': '5863091683', 'viewport': 'width=device-width, initial-scale=1.0, user-scalable=no', 'apple-itunes-app': 'app-id=284876795'}], 'breadcrumb': [{'url': 'Europe', 'title': 'Europe'}, {'url': 'Turkey', 'title': 'Turkey'}, {'url': 'Turkish Black Sea Coast', 'title': 'Turkish Black Sea Coast'}, {'url': 'Artvin Province', 'title': 'Artvin Province'}, {'url': 'Artvin', 'title': 'Artvin'}, {'url': 'Artvin Flights', 'title': 'Artvin Flights'}, {'url': 'Artvin Economy Flights', 'title': 'Artvin Economy Flights'}], 'cse_image': [{'src': 'https://media-cdn.tripadvisor.com/media/photo-s/0a/b7/b7/cf/borcka-karagol.jpg'}]}",PySearch,10,1,utf8,utf8,off,012859022920491477448:pubdbfjmmec,uk,0.283406,0.28,92500,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1451,tickets to athens,1,Cheap Flights to Athens (ATH) from $394 - KAYAK,"Find flights to Athens on Lufthansa, JetBlue, SWISS and more. Fly round-trip from \nNew York from $394, from Chicago from $460, from Fort Lauderdale from $445Â ...",www.kayak.com,https://www.kayak.com/flight-routes/United-States-US0/Athens-Eleftherios-V--ATH,2019-01-01 11:26:33.435599+00:00,27000000,JzRAWPWF96UJ,https://www.kayak.com/flight-routes/.../Athens-Eleftherios-V--ATH,https://www.kayak.com/flight-routes/.../<b>Athens</b>-Eleftherios-V--ATH,"Find <b>flights to Athens</b> on Lufthansa, JetBlue, SWISS and more. Fly round-trip from <br>\nNew York from $394, from Chicago from $460, from Fort Lauderdale from $445&nbsp;...",Cheap <b>Flights to Athens</b> (ATH) from $394 - KAYAK,customsearch#result,"{'cse_thumbnail': [{'width': '310', 'height': '163', 'src': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQ2TGuhU4BQGEAzqzd0JENgdlvVTL0_VD3l2HWQAv1P1r7ddVnTHXKx7Mga'}], 'metatags': [{'viewport': 'width=device-width, initial-scale=1', 'format-detection': 'telephone=no', 'kayak_split_flight_routes_xp': '0', 'msvalidate.01': 'F474AA342B64B17CDE53F5C5D4C07C7A', 'r9-version': 'R446c', 'r9-built': '20181220.035044', 'r9-rendered': '20181222.115038.EST', 'og:image': 'https://a1.r9cdn.net/rimg/provider-logos/common/socialmedia/kayak-logo.png?width=1200&height=630&crop=false', 'og:image:width': '1200', 'og:image:height': '630', 'og:title': 'Cheap Flights to Athens (ATH) from $394 - KAYAK', 'og:type': 'website', 'og:description': 'Find flights to Athens on Lufthansa, JetBlue, SWISS and more. Fly round-trip from New York from $394, from Chicago from $460, from Fort Lauderdale from $445, from Washington from $407, from Boston from $408. Search for Athens flights on KAYAK now to find the best deal.', 'og:url': 'https://www.kayak.com/flight-routes/United-States-US0/Athens-Eleftherios-V--ATH', 'og:site_name': 'KAYAK', 'fb:pages': '90811893045', 'twitter:card': 'summary_large_image', 'twitter:site': '@KAYAK', 'twitter:creator': '@KAYAK', 'twitter:title': 'Cheap Flights to Athens (ATH) from $394 - KAYAK', 'twitter:description': 'Find flights to Athens on Lufthansa, JetBlue, SWISS and more. Fly round-trip from New York from $394, from Chicago from $460, from Fort Lauderdale from $445, from Washington from $407, from Boston from $408. Search for Athens flights on KAYAK now to find the best deal.', 'twitter:image:src': 'https://a1.r9cdn.net/rimg/provider-logos/common/socialmedia/kayak-logo.png?width=440&height=220&crop=false', 'kayak_page': 'flight,static-flight-route,unknown'}], 'breadcrumb': [{'url': 'Flights', 'title': 'Flights'}, {'url': 'Worldwide', 'title': 'Worldwide'}, {'url': 'Europe', 'title': 'Europe'}, {'url': 'Greece', 'title': 'Greece'}], 'cse_image': [{'src': 'https://a1.r9cdn.net/rimg/provider-logos/common/socialmedia/kayak-logo.png?width=1200&height=630&crop=false'}]}",PySearch,10,1,utf8,utf8,off,012859022920491477448:pubdbfjmmec,us,0.278048,0.28,27000000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
333,flights to los angeles,4,Cheap Flights to Los Angeles from $23 | JETCOST,Cheap flights to Los Angeles (United States) âœˆ Compare airfares from airlines \nand travel agents and save on your Los Angeles flights with JETCOST (deals \nfromÂ ...,us.jetcost.com,https://us.jetcost.com/en/flights/united+states/los+angeles/,2019-01-01 11:26:28.970258+00:00,149000000,48AsM2koE90J,https://us.jetcost.com/en/flights/united+states/los+angeles/,https://us.jetcost.com/en/<b>flights</b>/united+states/<b>los</b>+<b>angeles</b>/,Cheap <b>flights to Los Angeles</b> (United States) âœˆ Compare airfares from airlines <br>\nand travel agents and save on your Los Angeles flights with JETCOST (deals <br>\nfrom&nbsp;...,Cheap <b>Flights to Los Angeles</b> from $23 | JETCOST,customsearch#result,"{'cse_thumbnail': [{'width': '225', 'height': '225', 'src': 'https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcTYDEBUTQmdls3mx2sr0l40Cy3q-sb2UMMZ2R00xCkjy2DQJqEwNBjpDqtb'}], 'metatags': [{'viewport': 'width=device-width,initial-scale=1,maximum-scale=1,user-scalable=no,minimal-ui', 'theme-color': '#12305a', 'msapplication-navbutton-color': '#12305a', 'google-play-app': 'app-id=com.jetcost.jetcost', 'apple-itunes-app': 'app-id=427791197', 'apple-mobile-web-app-status-bar-style': 'black-translucent', 'apple-mobile-web-app-capable': 'yes', 'title': 'Cheap flights to Los Angeles - Compare Los Angeles (United States) flight prices | JETCOST', 'og:title': 'Cheap flights to Los Angeles - Compare Los Angeles (United States) flight prices | JETCOST', 'og:description': 'Cheap flights to Los Angeles (United States) âœˆ Compare airfares from airlines and travel agents and save on your Los Angeles flights with JETCOST (deals from $23).', 'og:url': 'https://us.jetcost.com/en/flights/united+states/los+angeles/', 'og:type': 'company', 'og:image': 'https://a.jtcstatic.com/images/common/og.jpg', 'og:image:width': '512', 'og:image:height': '512', 'og:site_name': 'Jetcost'}], 'listitem': [{'item': 'Cheap flights', 'name': 'Cheap flights', 'position': '1'}, {'item': 'Flights by country', 'name': 'Flights by country', 'position': '2'}, {'item': 'Flights to United States', 'name': 'Flights to United States', 'position': '3'}], 'cse_image': [{'src': 'https://a.jtcstatic.com/images/common/og.jpg'}]}",PySearch,10,1,utf8,utf8,off,012859022920491477448:pubdbfjmmec,us,0.492194,0.49,149000000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3900,tickets to siem reap,9,"Cheap Flights to Siem Reap, Cambodia - Search Deals on Airfare to ...","Looking for cheap flights to Siem Reap, Cambodia from your destination? Search \nfor airfare and flight ticket deals at cheapflights.com and book your next flightÂ ...",www.cheapflights.com,https://www.cheapflights.com/flights-to-siem-reap/,2019-01-01 11:26:43.274045+00:00,3030000,EPc-BMaoKfYJ,https://www.cheapflights.com/flights-to-siem-reap/,https://www.cheap<b>flights</b>.com/<b>flights-to-siem</b>-<b>reap</b>/,"Looking for cheap <b>flights to Siem Reap</b>, Cambodia from your destination? Search <br>\nfor airfare and flight ticket deals at cheapflights.com and book your next flight&nbsp;...","Cheap <b>Flights to Siem Reap</b>, Cambodia - Search Deals on Airfare to ...",customsearch#result,"{'cse_thumbnail': [{'width': '310', 'height': '163', 'src': 'https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcTrnZaIxdGLxEJ5BzSQQJkQVHqPw20iUWamDtHzCekPCmSfYEO28XT48Zw9'}], 'metatags': [{'viewport': 'width=device-width, initial-scale=1', 'format-detection': 'telephone=no', 'r9-version': 'R446c', 'r9-built': '20181220.035044', 'r9-rendered': '20181220.073119.EST', 'og:image': 'https://www.cheapflights.com/rimg/provider-logos/common/socialmedia/cheapflights-logo.png?width=1200&height=630&crop=false', 'og:image:width': '1200', 'og:image:height': '630', 'og:title': 'Cheap Flights to Siem Reap, Cambodia - Search Deals on Airfare to Siem Reap from Cheapflights.com', 'og:type': 'website', 'og:description': 'Looking for cheap flights to Siem Reap, Cambodia from your destination? Search for airfare and flight ticket deals at cheapflights.com and book your next flight today.', 'og:url': 'https://www.cheapflights.com/flights-to-siem-reap/', 'og:site_name': 'Cheapflights', 'fb:pages': '90811893045', 'twitter:card': 'summary_large_image', 'twitter:site': '@Cheapflights', 'twitter:creator': '@Cheapflights', 'twitter:title': 'Cheap Flights to Siem Reap, Cambodia - Search Deals on Airfare to Siem Reap from Cheapflights.com', 'twitter:description': 'Looking for cheap flights to Siem Reap, Cambodia from your destination? Search for airfare and flight ticket deals at cheapflights.com and book your next flight today.', 'twitter:image:src': 'https://www.cheapflights.com/rimg/provider-logos/common/socialmedia/cheapflights-logo.png?width=440&height=220&crop=false', 'kayak_page': 'flight,static-flight-route-cf,splits'}], 'breadcrumb': [{'url': 'Home', 'title': 'Home'}, {'url': 'Flights', 'title': 'Flights'}, {'url': 'Asia', 'title': 'Asia'}, {'url': 'Cambodia', 'title': 'Cambodia'}], 'cse_image': [{'src': 'https://www.cheapflights.com/rimg/provider-logos/common/socialmedia/cheapflights-logo.png?width=1200&height=630&crop=false'}]}",PySearch,10,1,utf8,utf8,off,012859022920491477448:pubdbfjmmec,uk,0.341693,0.34,3030000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2465,flights to dublin,5,Flights to Dublin | Cheap Dublin flights | lastminute.com,Flights to Dublin. Find cheap flights to Dublin. Book your flight and save money \nwith lastminute.com.,www.lastminute.com,https://www.lastminute.com/flights/dublin,2019-01-01 11:26:36.737744+00:00,47700000,mRh_j9qTHPEJ,https://www.lastminute.com/flights/dublin,https://www.lastminute.com/<b>flights</b>/<b>dublin</b>,<b>Flights to Dublin</b>. Find cheap <b>flights to Dublin</b>. Book your flight and save money <br>\nwith lastminute.com.,<b>Flights to Dublin</b> | Cheap Dublin flights | lastminute.com,customsearch#result,"{'cse_thumbnail': [{'width': '299', 'height': '168', 'src': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTV4q7P9InXcNb2M84QynAmAUE5yUpRKMj6yiM05IaL3-NmZ9j0Z5yukDo'}], 'metatags': [{'viewport': 'width=device-width, user-scalable=no', 'og:title': 'Flights to Dublin from Â£9. Best lastminute.com offers.', 'og:description': 'Compare the best available deals to Dublin in real time: select the perfect combination from hundreds of airlines and book your next flight in just a few simple clicks!', 'og:url': 'https://www.lastminute.com/flights/dublin', 'og:site_name': 'lastminute.com', 'fb:page_id': '201882183211998', 'og:type': 'product', 'og:image': 'https://www.lastminute.com/flights/lastminute/img/dublin.jpg', 'twitter:card': 'summary', 'twitter:title': 'Flights to Dublin from Â£9 | Cheap Dublin flights | lastminute.com', 'twitter:description': 'Compare the best available deals to Dublin in real time: select the perfect combination from hundreds of airlines and book your next flight in just a few simple clicks!', 'twitter:url': 'https://www.lastminute.com/flights/dublin', 'twitter:image': 'https://www.lastminute.com/flights/lastminute/img/dublin.jpg', 'msapplication-tilecolor': '#ffffff', 'msapplication-tileimage': '/flights/static/current/images/lastminute/mstile-144x144.png', 'msapplication-config': '/flights/static/current/images/lastminute/browserconfig.xml', 'theme-color': '#EC008C'}], 'breadcrumb': [{'url': 'Flights', 'title': 'Flights'}, {'url': 'Ireland flights', 'title': 'Ireland flights'}], 'cse_image': [{'src': 'https://www.lastminute.com/flights/lastminute/img/dublin.jpg'}]}",PySearch,10,1,utf8,utf8,off,012859022920491477448:pubdbfjmmec,uk,0.316142,0.32,47700000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3160,tickets to guangzhou,10,"Cheap Flights to Guangzhou, China - Search Deals on Airfare to ...","Looking for cheap flights to Guangzhou, China from your destination? Search for \nairfare and flight ticket deals at cheapflights.com and book your next flightÂ ...",www.cheapflights.com,https://www.cheapflights.com/flights-to-guangzhou/,2019-01-01 11:26:39.839681+00:00,8150000,ZoM47ZJwu2wJ,https://www.cheapflights.com/flights-to-guangzhou/,https://www.cheap<b>flights</b>.com/<b>flights-to-guangzhou</b>/,"Looking for cheap <b>flights to Guangzhou</b>, China from your destination? Search for <br>\nairfare and flight ticket deals at cheapflights.com and book your next flight&nbsp;...","Cheap <b>Flights to Guangzhou</b>, China - Search Deals on Airfare to ...",customsearch#result,"{'cse_thumbnail': [{'width': '310', 'height': '163', 'src': 'https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcTrnZaIxdGLxEJ5BzSQQJkQVHqPw20iUWamDtHzCekPCmSfYEO28XT48Zw9'}], 'metatags': [{'viewport': 'width=device-width, initial-scale=1', 'format-detection': 'telephone=no', 'r9-version': 'R446c', 'r9-built': '20181220.035044', 'r9-rendered': '20181223.075746.EST', 'og:image': 'https://www.cheapflights.com/rimg/provider-logos/common/socialmedia/cheapflights-logo.png?width=1200&height=630&crop=false', 'og:image:width': '1200', 'og:image:height': '630', 'og:title': 'Cheap Flights to Guangzhou, China - Search Deals on Airfare to Guangzhou from Cheapflights.com', 'og:type': 'website', 'og:description': 'Looking for cheap flights to Guangzhou, China from your destination? Search for airfare and flight ticket deals at cheapflights.com and book your next flight today.', 'og:url': 'https://www.cheapflights.com/flights-to-guangzhou/', 'og:site_name': 'Cheapflights', 'fb:pages': '90811893045', 'twitter:card': 'summary_large_image', 'twitter:site': '@Cheapflights', 'twitter:creator': '@Cheapflights', 'twitter:title': 'Cheap Flights to Guangzhou, China - Search Deals on Airfare to Guangzhou from Cheapflights.com', 'twitter:description': 'Looking for cheap flights to Guangzhou, China from your destination? Search for airfare and flight ticket deals at cheapflights.com and book your next flight today.', 'twitter:image:src': 'https://www.cheapflights.com/rimg/provider-logos/common/socialmedia/cheapflights-logo.png?width=440&height=220&crop=false', 'kayak_page': 'flight,static-flight-route-cf,splits'}], 'breadcrumb': [{'url': 'Home', 'title': 'Home'}, {'url': 'Flights', 'title': 'Flights'}, {'url': 'Asia', 'title': 'Asia'}, {'url': 'China', 'title': 'China'}], 'cse_image': [{'src': 'https://www.cheapflights.com/rimg/provider-logos/common/socialmedia/cheapflights-logo.png?width=1200&height=630&crop=false'}]}",PySearch,10,1,utf8,utf8,off,012859022920491477448:pubdbfjmmec,uk,0.323187,0.32,8150000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3653,tickets to copenhagen,2,Cheap Flights to Copenhagen (CPH) from Â£13 - KAYAK,"Find flights to Copenhagen on Ryanair, easyJet and more. Fly return from \nLiverpool from Â£40, from Bristol from Â£56, from London from Â£25, from \nManchesterÂ ...",www.kayak.co.uk,https://www.kayak.co.uk/flight-routes/United-Kingdom-GB0/Kastrup-Copenhagen-CPH,2019-01-01 11:26:42.019916+00:00,18000000,qBnYwNRAGLkJ,https://www.kayak.co.uk/flight-routes/.../Kastrup-Copenhagen-CPH,https://www.kayak.co.uk/flight-routes/.../Kastrup-<b>Copenhagen</b>-CPH,"Find <b>flights to Copenhagen</b> on Ryanair, easyJet and more. Fly return from <br>\nLiverpool from Â£40, from Bristol from Â£56, from London from Â£25, from <br>\nManchester&nbsp;...",Cheap <b>Flights to Copenhagen</b> (CPH) from Â£13 - KAYAK,customsearch#result,"{'cse_thumbnail': [{'width': '310', 'height': '163', 'src': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQ2TGuhU4BQGEAzqzd0JENgdlvVTL0_VD3l2HWQAv1P1r7ddVnTHXKx7Mga'}], 'metatags': [{'viewport': 'width=device-width, initial-scale=1', 'format-detection': 'telephone=no', 'msvalidate.01': 'F474AA342B64B17CDE53F5C5D4C07C7A', 'r9-version': 'R446c', 'r9-built': '20181220.035044', 'r9-rendered': '20181229.194244.EST', 'og:image': 'https://a1.r9cdn.net/rimg/provider-logos/common/socialmedia/kayak-logo.png?width=1200&height=630&crop=false', 'og:image:width': '1200', 'og:image:height': '630', 'og:title': 'Cheap Flights to Copenhagen (CPH) from Â£13 - KAYAK', 'og:type': 'website', 'og:description': 'Find flights to Copenhagen on Ryanair, easyJet and more. Fly return from Liverpool from Â£40, from Bristol from Â£56, from London from Â£25, from Manchester from Â£39, from Edinburgh from Â£13. Search for Copenhagen flights on KAYAK now to find the best deal.', 'og:url': 'https://www.kayak.co.uk/flight-routes/United-Kingdom-GB0/Kastrup-Copenhagen-CPH', 'og:site_name': 'KAYAK', 'fb:pages': '90811893045', 'twitter:card': 'summary_large_image', 'twitter:site': '@KAYAK', 'twitter:creator': '@KAYAK', 'twitter:title': 'Cheap Flights to Copenhagen (CPH) from Â£13 - KAYAK', 'twitter:description': 'Find flights to Copenhagen on Ryanair, easyJet and more. Fly return from Liverpool from Â£40, from Bristol from Â£56, from London from Â£25, from Manchester from Â£39, from Edinburgh from Â£13. Search for Copenhagen flights on KAYAK now to find the best deal.', 'twitter:image:src': 'https://a1.r9cdn.net/rimg/provider-logos/common/socialmedia/kayak-logo.png?width=440&height=220&crop=false', 'kayak_page': 'flight,static-flight-route,unknown', 'kayak_split_flight_routes_xp': '1'}], 'breadcrumb': [{'url': 'Flights', 'title': 'Flights'}, {'url': 'Worldwide', 'title': 'Worldwide'}, {'url': 'Europe', 'title': 'Europe'}, {'url': 'Denmark', 'title': 'Denmark'}], 'cse_image': [{'src': 'https://a1.r9cdn.net/rimg/provider-logos/common/socialmedia/kayak-logo.png?width=1200&height=630&crop=false'}]}",PySearch,10,1,utf8,utf8,off,012859022920491477448:pubdbfjmmec,uk,0.292458,0.29,18000000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


## Extracting Named Entities using spaCy

In [5]:
flights_df = flights_df[["search_string","action_id"]]

In [6]:
# Checkout the default pipes
spacy_nlp.pipe_names

['tagger', 'parser', 'ner']

In [7]:
title_text = flights_df["search_string"].iloc[1]
title_text

'$480 Flights to Hong Kong, China (HKG) - TripAdvisor'

In [8]:
spacy_document = spacy_nlp(title_text)
for ent in spacy_document.ents:
    print(ent.text,ent.label_)

Hong Kong GPE
China GPE


#### Updating named Entity Recognizer


In [9]:
# Getting the pipeline component
ner=spacy_nlp.get_pipe("ner")

#### Training Samples

In [10]:
# training data
TRAIN_DATA = [
              ("Cheap Flights to London, (LON) Airline Tickets", {"entities": [(17, 23, "CITY")]}),
              ("Flights to Bangkok, Thailand : Best Fare Guarantee on Plane Tickets ...", {"entities": [(11, 18, "CITY")]}),
              ("Deal Alert: Nonstop Flights to London From $369 Round-Trip", {"entities": [(31,37, "CITY")]}),
              ("I was driving a BMW", {"entities": [(16,19, "PRODUCT")]}),
              ("I ordered this from ShopClues", {"entities": [(20,29, "ORG")]}),
              ("Fridge can be ordered in Amazon ", {"entities": [(0,6, "PRODUCT")]}),
              ("I bought a new Washer", {"entities": [(16,22, "PRODUCT")]}),
              ("I bought a old table", {"entities": [(16,21, "PRODUCT")]}),
              ("I bought a fancy dress", {"entities": [(18,23, "PRODUCT")]}),
              ("I rented a camera", {"entities": [(12,18, "PRODUCT")]}),
              ("I rented a tent for our trip", {"entities": [(12,16, "PRODUCT")]}),
              ("I rented a screwdriver from our neighbour", {"entities": [(12,22, "PRODUCT")]}),
              ("I repaired my computer", {"entities": [(15,23, "PRODUCT")]}),
              ("I got my clock fixed", {"entities": [(16,21, "PRODUCT")]}),
              ("I got my truck fixed", {"entities": [(16,21, "PRODUCT")]}),
              ("Flipkart started it's journey from zero", {"entities": [(0,8, "ORG")]}),
              ("I recently ordered from Max", {"entities": [(24,27, "ORG")]}),
              ("Flipkart is recognized as leader in market",{"entities": [(0,8, "ORG")]}),
              ("I recently ordered from Swiggy", {"entities": [(24,29, "ORG")})
              ]

In [11]:
# Adding labels to the `ner`

for _, annotations in TRAIN_DATA:
    for ent in annotations.get("entities"):
        ner.add_label(ent[2])

In [12]:
# Disable pipeline components you dont need to change
pipe_exceptions = ["ner", "trf_wordpiecer", "trf_tok2vec"]
unaffected_pipes = [pipe for pipe in spacy_nlp.pipe_names if pipe not in pipe_exceptions]

#### Training the NER model

In [13]:
# TRAINING THE MODEL
with spacy_nlp.disable_pipes(*unaffected_pipes):

  # Training for 30 iterations
  for iteration in range(30):

    # shuufling examples  before every iteration
    random.shuffle(TRAIN_DATA)
    losses = {}
    # batch up the examples using spaCy's minibatch
    batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
    for batch in batches:
        texts, annotations = zip(*batch)
        spacy_nlp.update(
                    texts,  # batch of texts
                    annotations,  # batch of annotations
                    drop=0.5,  # dropout - make it harder to memorise data
                    losses=losses,
                )
        print("Losses", losses)

Losses {'ner': 26.690871238708496}
Losses {'ner': 44.2666876912117}
Losses {'ner': 81.87548094987869}
Losses {'ner': 113.82922297716141}
Losses {'ner': 124.59676267579198}
Losses {'ner': 33.12117671966553}
Losses {'ner': 49.28369355201721}
Losses {'ner': 76.81937098503113}
Losses {'ner': 97.10603833198547}
Losses {'ner': 109.11544096469879}
Losses {'ner': 19.61870551109314}
Losses {'ner': 47.195892095565796}
Losses {'ner': 67.51750779151917}
Losses {'ner': 89.93313837051392}
Losses {'ner': 105.60835301876068}
Losses {'ner': 25.8051438331604}
Losses {'ner': 38.051573157310486}
Losses {'ner': 66.15990388393402}
Losses {'ner': 93.58843505382538}
Losses {'ner': 123.0826324224472}
Losses {'ner': 12.14603054523468}
Losses {'ner': 33.60123920440674}
Losses {'ner': 54.36260950565338}
Losses {'ner': 83.90079152584076}
Losses {'ner': 108.51874876022339}
Losses {'ner': 12.639101505279541}
Losses {'ner': 45.71678876876831}
Losses {'ner': 70.96029210090637}
Losses {'ner': 96.37977981567383}
Losses 

In [34]:
# Testing the model
doc = spacy_nlp("I wish to travel to Kisumu next week")
print("Entities", [(ent.text, ent.label_) for ent in doc.ents])

Entities [('Kisumu', 'ORG'), ('next week', 'DATE')]


#### Save the model

In [15]:
# Save the  model to directory
output_dir = Path('./ner_models/')
spacy_nlp.to_disk(output_dir)
print("Saved model to", output_dir)

Saved model to ner_models


#### Loading the models

In [16]:
# Load the saved model and predict
print("Loading from", output_dir)
spacy_nlp_updated = spacy.load(output_dir)
doc = spacy_nlp_updated("Fridge can be ordered in FlipKart" )
print("Entities", [(ent.text, ent.label_) for ent in doc.ents])

Loading from ner_models
Entities [('Fridge', 'PRODUCT'), ('FlipKart', 'ORG')]


### Modeling

In [17]:
# Perform Data Cleaning on the text
def cleaning(message):
    import nltk
    
    # 1. Remove HTML.
    html_text = BeautifulSoup(message,"html.parser").get_text()
    
    # 2. Remove non-letters.
    letters = re.sub("[^a-zA-Z]", " ", html_text)    
    letters = re.sub('[^a-zA-Zа-яА-Я1-9]+', ' ', html_text)
    letters = re.sub(' +',' ', html_text)
    letters = re.sub(r"\n", "", letters)    
    letters = re.sub(r"\r", "", letters) 
    #letters = re.sub(r"[0-9]", "", letters)
    letters = re.sub(r"\'", "", letters)    
    letters = re.sub(r"\"", "", letters)  
    
    # 3. Remove Any Emails that may be present
    letters = re.sub('\S*@\S*\s?', '', letters)
    
    # 4. Remove any newline characters
    letters = re.sub('\s+', ' ', letters)
    
    # 5. Remove the distracting single quotes
    letters = re.sub(r"\'", "", letters) 
        
    # 6. Convert to lower case.
    letters = letters.lower()
    
    # 7. Tokenize.
    tokens = nltk.word_tokenize(letters)
    
    # 8. Convert the stopwords list to "set" data type.
    #stops = set(nltk.corpus.stopwords.words("english"))
    
    # 9. Remove stop words. 
    #words = [w for w in tokens if not w in stops]
    
    # 10. Stemming
    #words = [nltk.stem.SnowballStemmer('english').stem(w) for w in words]
    
    # 11. Join the words back into one string separated by space, and return the result.
    return " ".join(tokens)

In [18]:
# Perform iterative data cleaning on each of the statements
flights_df['cleaned_search_string'] = flights_df['search_string'].apply(cleaning)
flights_df.sample(5)

Unnamed: 0,search_string,action_id,cleaned_search_string
3237,Cheap Pattaya flights and last minute deals - Opodo,7,cheap pattaya flights and last minute deals - opodo
465,Cheap flights to Dublin (DUB) from Â£9.78 | Ryanair.com,6,cheap flights to dublin ( dub ) from â£9.78 | ryanair.com
2558,Cheap Flights to Punta Cana from Â£375 - Cheapflights.co.uk,8,cheap flights to punta cana from â£375 - cheapflights.co.uk
2178,Cheap Flights to Makkah | Book Flights to Makkah Online - tajawal,8,cheap flights to makkah | book flights to makkah online - tajawal
1918,Cheap Flights to Rio de Janeiro â€“ C$705: Get Tickets Now | Expedia ...,8,cheap flights to rio de janeiro â€ “ c $ 705 : get tickets now | expedia ...


In [19]:
# One Hot Encode the target variable
flights_df['encoded_action_id'], encoded_action_mappings_by_id = flights_df['action_id'].factorize() 
flights_df.sample(2)

Unnamed: 0,search_string,action_id,cleaned_search_string,encoded_action_id
2130,Flights from London to Tokyo - lastminute.com,10,flights from london to tokyo - lastminute.com,9
1955,Book flights to Abu Dhabi (AUH) | Etihad Airways,5,book flights to abu dhabi ( auh ) | etihad airways,4


In [20]:
# Initialize Term Frequency and Inverse Document Frequency to be used
flights_tfidf = TfidfVectorizer(
    sublinear_tf=True, min_df=5, norm='l2', 
    encoding='latin-1', ngram_range=(1, 2), 
    stop_words='english')

In [21]:
# Extract the features from the text
derived_features = flights_tfidf.fit_transform(flights_df['cleaned_search_string'])

In [22]:
# Extract the labels
flights_labels = flights_df['encoded_action_id']

In [23]:
derived_features.shape

(4002, 1237)

In [24]:
# Initialize the algorithm
flights_lsvc_model_by_id = LinearSVC()

In [25]:
# split the dataset by category id
X_train_by_id, X_test_by_id, y_train_by_id, y_test_by_id, indices_train_by_id,indices_test_by_id = train_test_split(
    derived_features, flights_labels, flights_df.index, 
    test_size=0.33, random_state=12)

In [26]:
# Train 
flights_lsvc_model_by_id.fit(X_train_by_id, y_train_by_id)

LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
          intercept_scaling=1, loss='squared_hinge', max_iter=1000,
          multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
          verbose=0)

In [27]:
# Make predictions
y_pred_by_id = flights_lsvc_model_by_id.predict(X_test_by_id)

In [28]:
# View the classification 
print((classification_report(y_test_by_id, y_pred_by_id)))

              precision    recall  f1-score   support

           0       0.31      0.45      0.37       117
           1       0.25      0.23      0.24       156
           2       0.14      0.15      0.15       114
           3       0.19      0.19      0.19       125
           4       0.12      0.11      0.11       134
           5       0.11      0.11      0.11       139
           6       0.17      0.14      0.15       131
           7       0.13      0.12      0.13       139
           8       0.19      0.21      0.20       130
           9       0.24      0.21      0.22       136

    accuracy                           0.19      1321
   macro avg       0.18      0.19      0.19      1321
weighted avg       0.18      0.19      0.19      1321



In [29]:
# Checking accuracy by ID
accuracy_score(y_test_by_id, y_pred_by_id)

0.18925056775170326

In [30]:
# View the confusion matrix BY id
pd.DataFrame(confusion_matrix(y_test_by_id, y_pred_by_id))

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,53,9,14,14,9,5,2,4,5,2
1,26,36,31,10,12,13,4,11,8,5
2,23,18,17,17,8,6,5,4,7,9
3,14,22,12,24,18,7,7,10,6,5
4,13,10,12,14,15,25,9,16,8,12
5,12,10,6,14,22,15,24,16,14,6
6,8,8,4,11,12,22,18,16,16,16
7,4,12,9,6,16,15,13,17,27,20
8,9,9,6,11,6,15,9,23,27,15
9,10,10,9,6,11,8,13,15,26,28


In [31]:
# Create a modeling pipeline by ID
lsvc_model_pipeline_by_id = Pipeline(
    [
        ('vectorizer', CountVectorizer(ngram_range=(1,2))),
        ('tfidf', TfidfTransformer(use_idf=True)),
        ('clf', OneVsRestClassifier(LinearSVC(class_weight='balanced')))
    ])

In [32]:
# Train the model
lsvc_model_pipeline_by_id.fit(flights_df['cleaned_search_string'], flights_df['action_id'])



Pipeline(memory=None,
         steps=[('vectorizer',
                 CountVectorizer(analyzer='word', binary=False,
                                 decode_error='strict',
                                 dtype=<class 'numpy.int64'>, encoding='utf-8',
                                 input='content', lowercase=True, max_df=1.0,
                                 max_features=None, min_df=1,
                                 ngram_range=(1, 2), preprocessor=None,
                                 stop_words=None, strip_accents=None,
                                 token_pattern='(?u)\\b\\w\\w+\\b',
                                 tokenizer=None, vocabula...)),
                ('tfidf',
                 TfidfTransformer(norm='l2', smooth_idf=True,
                                  sublinear_tf=False, use_idf=True)),
                ('clf',
                 OneVsRestClassifier(estimator=LinearSVC(C=1.0,
                                                         class_weight='balanced',
     

In [33]:
## Save the model
lsvc_model_by_id = joblib.dump(lsvc_model_pipeline_by_id,
                                            './models/flight-booking-model.pkl')

#### Load the saved model

In [35]:
loaded_lsvc_model_by_id = joblib.load('./models/flight-booking-model.pkl')

In [36]:
# Test message to predict the sentiment
test_message = ["I wish to travel to Kisumu next week!"]

In [37]:
# Perform the prediction
print("The predicted action is : {} ".format(loaded_lsvc_model_by_id.predict(test_message)[0]))

The predicted action is : 8 


# End