<a href="https://colab.research.google.com/github/Thofa90/Customer-Segmentation-for-Personalized-Rewards-using-Machine-Learning/blob/main/Master_Project_Travel_Tide_Feature_Engineering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Feature Engineering**

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

**Import each Table**

In [None]:
# importing data
from google.colab import drive
drive.mount('/content/drive')
path='/content/drive/My Drive/Datasets/master_project/'


hotels = pd.read_csv( path + 'filtered_hotels_new.csv')
users = pd. read_csv(path +'filtered_users.csv')
flights = pd. read_csv(path +'filtered_flights.csv')
sessions = pd. read_csv(path +'filtered_sessions.csv')
print('shape_hotels',hotels.shape)
print('shape_users',users.shape)
print('shape_flights',flights.shape)
print('shape_sessions',sessions.shape)


Mounted at /content/drive
shape_hotels (13772, 11)
shape_users (5782, 11)
shape_flights (13193, 13)
shape_sessions (47436, 13)


**User-based analysis** helps discover which **perk** (free meals, free checked bags, no cancellation fees, exclusive discounts, 1 night free hotels with flight) matters most to which segment, based on their actual behavior — not assumptions. Then offer the reward program to customer based on their actual behavior which will be boosting the chance that they’ll join the **rewards program**.

In [None]:
sessions.columns


Index(['session_id', 'user_id', 'trip_id', 'session_start', 'session_end',
       'flight_discount', 'hotel_discount', 'flight_discount_amount',
       'hotel_discount_amount', 'flight_booked', 'hotel_booked', 'page_clicks',
       'cancellation'],
      dtype='object')

In [None]:
# avg clicks made per user from session table
page_click = sessions.groupby('user_id')['page_clicks'].mean()
page_click
# one user can have several sessions

Unnamed: 0_level_0,page_clicks
user_id,Unnamed: 1_level_1
94883,9.125
101486,16.375
101961,15.750
106907,30.000
118043,20.500
...,...
792549,14.250
796032,18.500
801660,14.375
811077,13.125


# **Creating new interesting columns on the tables**

In [None]:
hotels ['price'] = hotels['rooms'] * hotels['hotel_per_room_usd'] * hotels['nights']
flights['price'] = flights['seats'] * flights[ 'base_fare_usd']

sessions['session_start'] = pd.to_datetime(sessions['session_start'],format='mixed', errors='coerce')
sessions['session_end'] = pd.to_datetime(sessions['session_end'],format='mixed', errors='coerce')

# Duration of each session
sessions ['session_duration'] = sessions[ 'session_end'] - sessions['session_start']

flights['departure_time'] = pd.to_datetime(flights['departure_time'])
flights['return_time'] = pd.to_datetime(flights['return_time'])


# **Features from Sessions**

In [None]:
sessions.head(5)

Unnamed: 0,session_id,user_id,trip_id,session_start,session_end,flight_discount,hotel_discount,flight_discount_amount,hotel_discount_amount,flight_booked,hotel_booked,page_clicks,cancellation,session_duration
0,536782-40ea451ff53841b296036272519d4e7e,536782,,2023-01-18 19:33:00,2023-01-18 19:34:20,False,False,,,False,False,11,False,0 days 00:01:20
1,570600-1913f4dc10e445e59765e73588e68dfd,570600,,2023-04-28 21:24:00,2023-04-28 21:24:16,False,False,,,False,False,2,False,0 days 00:00:16
2,511055-89aed43aef924e97a0b4cadff654328f,511055,511055-57ed6923358d47a0a3be86eaf8128ece,2023-02-02 20:49:00,2023-02-02 20:52:39,True,False,0.1,,True,True,30,False,0 days 00:03:39
3,677209-77c593f41bd04228b89a55b3a9d34141,677209,,2023-06-01 21:20:00,2023-06-01 21:20:24,False,False,,,False,False,3,False,0 days 00:00:24
4,546867-c1b3888def70494a893b025bfa3a1b80,546867,,2023-03-04 20:38:00,2023-03-04 20:39:08,False,False,,,False,False,9,False,0 days 00:01:08


In [None]:
# Amount of trips and sessions per user (feature 1,2)
amount_of_trips = sessions[['trip_id', 'user_id']].groupby('user_id').nunique() # how many unique trips does this user has
amount_of_trips.columns = ['amount_of_trips']
amount_of_sessions = sessions[['session_id', 'user_id']].groupby('user_id').nunique() # how many unique session does this user has
amount_of_sessions.columns = ['amount_of_sessions']
# avg clicks made per user from session table (feature 3)
avg_page_clicks = sessions[['page_clicks', 'user_id']].groupby('user_id').mean() # how many unique trips does this user has
avg_page_clicks.columns = ['avg_page_clicks']

print('min amount of trips',amount_of_trips['amount_of_trips'].min()) # 0
print('max amount of trips',amount_of_trips['amount_of_trips'].max()) # 8
print('min amount of sessions',amount_of_sessions['amount_of_sessions'].min()) #8
print('max amount of sessions',amount_of_sessions['amount_of_sessions'].max()) #12
print('avg_page_clicks', 'min:',avg_page_clicks['avg_page_clicks'].min(), 'max:',avg_page_clicks['avg_page_clicks'].max())
#amount_of_trips
#amount_of_sessions
avg_page_clicks



min amount of trips 0
max amount of trips 8
min amount of sessions 8
max amount of sessions 12
avg_page_clicks min: 4.125 max: 109.125


Unnamed: 0_level_0,avg_page_clicks
user_id,Unnamed: 1_level_1
94883,9.125
101486,16.375
101961,15.750
106907,30.000
118043,20.500
...,...
792549,14.250
796032,18.500
801660,14.375
811077,13.125


In [None]:
sessions[sessions['cancellation']].head(3) # returns all true cancellation

Unnamed: 0,session_id,user_id,trip_id,session_start,session_end,flight_discount,hotel_discount,flight_discount_amount,hotel_discount_amount,flight_booked,hotel_booked,page_clicks,cancellation,session_duration
132,581491-b003a392208a4ac7ac593d64f1925164,581491,581491-d35d5deaf3b94448985188367b7a9c11,2023-07-13 20:57:51,2023-07-13 22:57:51.000000,True,True,,,True,True,161,True,0 days 02:00:00
285,478146-b5260ecf071d4cbb8b10bc45157529c1,478146,478146-58ebe4bb5b0f4912a0c85ae5232e2935,2023-07-07 20:30:54,2023-07-07 22:30:54.000000,True,True,,,True,True,200,True,0 days 02:00:00
379,560680-ca6b428f7de14c01bdf43b3ebccef16c,560680,560680-54e6857f4e42431e8ad7ff187a0e0d24,2023-02-17 14:09:30,2023-02-17 15:37:15.755074,True,True,,,True,True,87,True,0 days 01:27:45.755074


In [None]:
# Amount of canceled trips per user (feature 4)
canceled_trips_count = sessions[sessions['cancellation']][['trip_id','user_id']].groupby('user_id').nunique()
canceled_trips_count.columns = ['canceled_trips']
canceled_trips_count # min cancel 1 and max 2

Unnamed: 0_level_0,canceled_trips
user_id,Unnamed: 1_level_1
106907,1
171470,2
174997,1
182191,1
204943,1
...,...
736075,1
752933,1
763129,1
770252,1


**only valid trips interesting for analysis**

In [None]:
# Trip id of canceled trips
canceled_trip_ids = sessions[sessions['cancellation']]['trip_id'].unique()
print(len(canceled_trip_ids))
canceled_trip_ids # trip_ids of all cancelled trips




589


array(['581491-d35d5deaf3b94448985188367b7a9c11',
       '478146-58ebe4bb5b0f4912a0c85ae5232e2935',
       '560680-54e6857f4e42431e8ad7ff187a0e0d24',
       '488314-616505f4f9fd40059b9f8844c6e1929c',
       '513787-15bb5ff1d8a94b538ca2e2c0d58c05fc',
       '562275-5b2550a902f14543a77ca64eccdce8f8',
       '521335-087c5132bfe640af9360f0d76147f698',
       '509421-231423b401c84e999eb56675a6995dee',
       '599649-419470087b72404a9a61f9ca92963971',
       '622343-2061dc37f60d410f9af97a51a7c8b676',
       '565827-22aa93ceeea24a01a41e5e1a50ceff9b',
       '331039-aeea9e6e87ab49bc9d6012a4162d82d2',
       '547100-c11a039eff2348da975f7f853f138d6f',
       '655166-b4985d2f61d24462a7df41e77b73d86c',
       '521491-12a59e8343ab4c98a8489573bac1070b',
       '547006-86c559087a7e47c6a2378ae9a769c223',
       '524194-fbd57fb047634eeb8b4dac9e0b07fc1b',
       '533932-c899b45d3a644d0a848bf066af4bbc7f',
       '504916-5761fe54f65241bc9c5a6b092da887c2',
       '566953-7f382e8ea4ba4476b4c16b6f610b9752',


In [None]:
# Sessions of trips that actually happened
sessions_valid = sessions[~sessions['trip_id'].isin(canceled_trip_ids)]
print(len(sessions_valid))
sessions_valid # return the sessions table with all trip_ids, which did not cancel

46265


Unnamed: 0,session_id,user_id,trip_id,session_start,session_end,flight_discount,hotel_discount,flight_discount_amount,hotel_discount_amount,flight_booked,hotel_booked,page_clicks,cancellation,session_duration
0,536782-40ea451ff53841b296036272519d4e7e,536782,,2023-01-18 19:33:00,2023-01-18 19:34:20,False,False,,,False,False,11,False,0 days 00:01:20
1,570600-1913f4dc10e445e59765e73588e68dfd,570600,,2023-04-28 21:24:00,2023-04-28 21:24:16,False,False,,,False,False,2,False,0 days 00:00:16
2,511055-89aed43aef924e97a0b4cadff654328f,511055,511055-57ed6923358d47a0a3be86eaf8128ece,2023-02-02 20:49:00,2023-02-02 20:52:39,True,False,0.10,,True,True,30,False,0 days 00:03:39
3,677209-77c593f41bd04228b89a55b3a9d34141,677209,,2023-06-01 21:20:00,2023-06-01 21:20:24,False,False,,,False,False,3,False,0 days 00:00:24
4,546867-c1b3888def70494a893b025bfa3a1b80,546867,,2023-03-04 20:38:00,2023-03-04 20:39:08,False,False,,,False,False,9,False,0 days 00:01:08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47430,525089-220a6a618105405c817494f914982514,525089,525089-118e10d811ad440a8690feec4bff1f2a,2023-02-13 08:15:00,2023-02-13 08:17:28,True,False,0.05,,True,True,20,False,0 days 00:02:28
47431,582801-f3c2610c200f441380a40d7bedf590f0,582801,582801-36ffbf38bbb944f29a14f0bba8b43186,2023-03-24 13:54:00,2023-03-24 14:03:51,False,False,,,True,True,79,False,0 days 00:09:51
47433,515523-a8d9bfbd2277461480f37b1126a557d6,515523,515523-5fbd163e4452409395e1f4615bd29dd9,2023-05-22 20:30:00,2023-05-22 20:33:06,False,True,,0.05,True,True,25,False,0 days 00:03:06
47434,468666-70a4257758814acb944d9393207241ef,468666,468666-03b9b2050f5a4b239f20febab410c2fd,2023-03-04 21:24:00,2023-03-04 21:27:53,True,False,0.25,,True,True,31,False,0 days 00:03:53


In [None]:
# Average session duration of trips that actually happened (feature 5)
valid_session_duration = sessions_valid.groupby('user_id')['session_duration'].mean()
valid_session_duration

Unnamed: 0_level_0,session_duration
user_id,Unnamed: 1_level_1
94883,0 days 00:01:07.750000
101486,0 days 00:02:02.250000
101961,0 days 00:01:57.750000
106907,0 days 00:02:49.500000
118043,0 days 00:02:33.125000
...,...
792549,0 days 00:01:46.875000
796032,0 days 00:01:25.666666666
801660,0 days 00:01:46
811077,0 days 00:01:39.125000


# **Features from Hotels**

In [None]:
hotels

Unnamed: 0,trip_id,hotel_name,nights,rooms,check_in_time,check_out_time,hotel_per_room_usd,check_in_dayofyear,check_in_hour,check_out_dayofyear,check_out_hour,price
0,582979-5475b9940ee240bf9328a0cc0bc9900c,NH Hotel - ottawa,7,1,2023-03-31 13:32:21.030,2023-04-07 11:00:00,110,90,13,97,11,770
1,514785-863ac3b4eb984f1eb226d67955623a5d,Choice Hotels - montreal,4,1,2023-06-21 20:06:24.300,2023-06-25 11:00:00,87,172,20,176,11,348
2,588386-233db0f627cf4aaaaeaa9a632daa5ec4,Accor - philadelphia,1,1,2023-04-04 10:43:44.175,2023-04-05 11:00:00,126,94,10,95,11,126
3,474120-a87f749cf54b4f4ca9262d07080a06b2,Accor - san francisco,5,2,2023-02-02 14:18:11.205,2023-02-07 11:00:00,346,33,14,38,11,3460
4,600395-79e934e3dcf2462587a0beffbebd6783,Marriott - memphis,2,2,2023-04-21 15:58:28.200,2023-04-23 11:00:00,153,111,15,113,11,612
...,...,...,...,...,...,...,...,...,...,...,...,...
13767,583623-70097bf1d1334cfb891ac2bd09f448ae,Shangri-La - montreal,4,1,2023-02-23 17:25:07.185,2023-02-27 11:00:00,142,54,17,58,11,568
13768,512377-acf089357a6b497d91eda34bb532578d,Marriott - washington,6,1,2023-04-02 12:25:51.465,2023-04-08 11:00:00,446,92,12,98,11,2676
13769,598533-40848e5db09e46aa8f56c0c3ccdefb2d,Hyatt - macau,12,1,2024-01-24 09:54:31.635,2024-02-05 11:00:00,114,24,9,36,11,1368
13770,497042-fa163de72e234eaf9c9bc8b477fbf66a,Hyatt - san antonio,2,1,2023-04-13 11:40:58.350,2023-04-15 11:00:00,103,103,11,105,11,206


In [None]:
# average nights per user_id
# but we dnt have user_id in hotels, so we need to merge

# Df to relate user_id with trip_id (used in hotels and flights)
user_trip_id = sessions_valid[['user_id','trip_id']].drop_duplicates()
user_trip_id

Unnamed: 0,user_id,trip_id
0,536782,
1,570600,
2,511055,511055-57ed6923358d47a0a3be86eaf8128ece
3,677209,
4,546867,
...,...,...
47429,560497,560497-deba8e593f6a4fa8b94b75ee4a8f4ecd
47430,525089,525089-118e10d811ad440a8690feec4bff1f2a
47431,582801,582801-36ffbf38bbb944f29a14f0bba8b43186
47433,515523,515523-5fbd163e4452409395e1f4615bd29dd9


In [None]:
hotels = pd.merge(hotels,user_trip_id, on='trip_id',how='left')
hotels


Unnamed: 0,trip_id,hotel_name,nights,rooms,check_in_time,check_out_time,hotel_per_room_usd,check_in_dayofyear,check_in_hour,check_out_dayofyear,check_out_hour,price,user_id
0,582979-5475b9940ee240bf9328a0cc0bc9900c,NH Hotel - ottawa,7,1,2023-03-31 13:32:21.030,2023-04-07 11:00:00,110,90,13,97,11,770,582979.0
1,514785-863ac3b4eb984f1eb226d67955623a5d,Choice Hotels - montreal,4,1,2023-06-21 20:06:24.300,2023-06-25 11:00:00,87,172,20,176,11,348,514785.0
2,588386-233db0f627cf4aaaaeaa9a632daa5ec4,Accor - philadelphia,1,1,2023-04-04 10:43:44.175,2023-04-05 11:00:00,126,94,10,95,11,126,588386.0
3,474120-a87f749cf54b4f4ca9262d07080a06b2,Accor - san francisco,5,2,2023-02-02 14:18:11.205,2023-02-07 11:00:00,346,33,14,38,11,3460,474120.0
4,600395-79e934e3dcf2462587a0beffbebd6783,Marriott - memphis,2,2,2023-04-21 15:58:28.200,2023-04-23 11:00:00,153,111,15,113,11,612,600395.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
13767,583623-70097bf1d1334cfb891ac2bd09f448ae,Shangri-La - montreal,4,1,2023-02-23 17:25:07.185,2023-02-27 11:00:00,142,54,17,58,11,568,583623.0
13768,512377-acf089357a6b497d91eda34bb532578d,Marriott - washington,6,1,2023-04-02 12:25:51.465,2023-04-08 11:00:00,446,92,12,98,11,2676,
13769,598533-40848e5db09e46aa8f56c0c3ccdefb2d,Hyatt - macau,12,1,2024-01-24 09:54:31.635,2024-02-05 11:00:00,114,24,9,36,11,1368,
13770,497042-fa163de72e234eaf9c9bc8b477fbf66a,Hyatt - san antonio,2,1,2023-04-13 11:40:58.350,2023-04-15 11:00:00,103,103,11,105,11,206,497042.0


In [None]:
hotels_valid=hotels[~hotels['trip_id'].isin(canceled_trip_ids)].copy()
hotels_valid

Unnamed: 0,trip_id,hotel_name,nights,rooms,check_in_time,check_out_time,hotel_per_room_usd,check_in_dayofyear,check_in_hour,check_out_dayofyear,check_out_hour,price,user_id
0,582979-5475b9940ee240bf9328a0cc0bc9900c,NH Hotel - ottawa,7,1,2023-03-31 13:32:21.030,2023-04-07 11:00:00,110,90,13,97,11,770,582979.0
1,514785-863ac3b4eb984f1eb226d67955623a5d,Choice Hotels - montreal,4,1,2023-06-21 20:06:24.300,2023-06-25 11:00:00,87,172,20,176,11,348,514785.0
2,588386-233db0f627cf4aaaaeaa9a632daa5ec4,Accor - philadelphia,1,1,2023-04-04 10:43:44.175,2023-04-05 11:00:00,126,94,10,95,11,126,588386.0
3,474120-a87f749cf54b4f4ca9262d07080a06b2,Accor - san francisco,5,2,2023-02-02 14:18:11.205,2023-02-07 11:00:00,346,33,14,38,11,3460,474120.0
4,600395-79e934e3dcf2462587a0beffbebd6783,Marriott - memphis,2,2,2023-04-21 15:58:28.200,2023-04-23 11:00:00,153,111,15,113,11,612,600395.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
13765,489785-3536fb4b92214634ba0d79ff10d884df,Choice Hotels - new york,4,1,2023-03-07 18:31:38.010,2023-03-11 11:00:00,224,66,18,70,11,896,489785.0
13766,525053-971b75befda441c7b1af7500e4652ddf,Hilton - milwaukee,2,1,2023-03-31 20:42:13.815,2023-04-02 11:00:00,200,90,20,92,11,400,525053.0
13767,583623-70097bf1d1334cfb891ac2bd09f448ae,Shangri-La - montreal,4,1,2023-02-23 17:25:07.185,2023-02-27 11:00:00,142,54,17,58,11,568,583623.0
13770,497042-fa163de72e234eaf9c9bc8b477fbf66a,Hyatt - san antonio,2,1,2023-04-13 11:40:58.350,2023-04-15 11:00:00,103,103,11,105,11,206,497042.0


In [None]:
# avg rooms per user (feature 6)
user_rooms = hotels_valid.groupby('user_id')['rooms'].mean()
user_rooms


Unnamed: 0_level_0,rooms
user_id,Unnamed: 1_level_1
94883.0,1.500000
101486.0,1.500000
101961.0,1.000000
118043.0,1.250000
125845.0,1.333333
...,...
785186.0,1.000000
792549.0,1.000000
796032.0,1.000000
801660.0,1.000000


In [None]:
# avg nights per user (feature 7)
user_nights = hotels_valid.groupby('user_id')['nights'].mean()
user_nights

Unnamed: 0_level_0,nights
user_id,Unnamed: 1_level_1
94883.0,1.000000
101486.0,4.500000
101961.0,4.400000
118043.0,6.000000
125845.0,2.333333
...,...
785186.0,1.500000
792549.0,5.000000
796032.0,4.500000
801660.0,2.333333


In [None]:
# avg price per user (feature 8)
user_price = hotels_valid.groupby('user_id')['price'].mean()
user_hotel_price=user_price.rename('hotel_price')
user_hotel_price

Unnamed: 0_level_0,hotel_price
user_id,Unnamed: 1_level_1
94883.0,115.000000
101486.0,1226.000000
101961.0,559.600000
118043.0,1659.500000
125845.0,288.000000
...,...
785186.0,237.500000
792549.0,180.000000
796032.0,827.500000
801660.0,360.333333


In [None]:
# amount of trips with hotels per user (feature 9)
user_hotels = hotels_valid.groupby('user_id')['trip_id'].nunique()
user_hotels=user_hotels.rename('hotel_count')
user_hotels


Unnamed: 0_level_0,hotel_count
user_id,Unnamed: 1_level_1
94883.0,2
101486.0,2
101961.0,5
118043.0,4
125845.0,3
...,...
785186.0,2
792549.0,1
796032.0,2
801660.0,3


# **Features from Flights**

In [None]:
flights.head(5)

Unnamed: 0,trip_id,origin_airport,destination,destination_airport,seats,return_flight_booked,departure_time,return_time,checked_bags,trip_airline,destination_airport_lat,destination_airport_lon,base_fare_usd,price
0,582979-5475b9940ee240bf9328a0cc0bc9900c,RNO,ottawa,YOW,1,True,2023-03-31 07:00:00,2023-04-07 07:00:00,1,Air Canada,45.323,-75.669,657.64,657.64
1,514785-863ac3b4eb984f1eb226d67955623a5d,FTW,montreal,YHU,1,False,2023-06-21 15:00:00,NaT,0,Delta Air Lines,45.517,-73.417,221.83,221.83
2,588386-233db0f627cf4aaaaeaa9a632daa5ec4,CLE,philadelphia,PHL,1,True,2023-04-04 08:00:00,2023-04-06 08:00:00,2,American Airlines,39.872,-75.241,106.0,106.0
3,474120-a87f749cf54b4f4ca9262d07080a06b2,YQB,san francisco,SFO,2,True,2023-02-02 07:00:00,2023-02-07 07:00:00,0,Alaska Airlines,37.619,-122.375,1507.83,3015.66
4,600395-79e934e3dcf2462587a0beffbebd6783,HOU,memphis,MEM,2,True,2023-04-21 13:00:00,2023-04-24 13:00:00,0,Delta Air Lines,35.042,-89.977,320.19,640.38


In [None]:
flights = pd.merge(flights, user_trip_id, on='trip_id', how='left')
flights_valid = flights[~flights['trip_id'].isin(canceled_trip_ids)].copy()

flights_valid


Unnamed: 0,trip_id,origin_airport,destination,destination_airport,seats,return_flight_booked,departure_time,return_time,checked_bags,trip_airline,destination_airport_lat,destination_airport_lon,base_fare_usd,price,user_id
0,582979-5475b9940ee240bf9328a0cc0bc9900c,RNO,ottawa,YOW,1,True,2023-03-31 07:00:00,2023-04-07 07:00:00,1,Air Canada,45.323,-75.669,657.64,657.64,582979.0
1,514785-863ac3b4eb984f1eb226d67955623a5d,FTW,montreal,YHU,1,False,2023-06-21 15:00:00,NaT,0,Delta Air Lines,45.517,-73.417,221.83,221.83,514785.0
2,588386-233db0f627cf4aaaaeaa9a632daa5ec4,CLE,philadelphia,PHL,1,True,2023-04-04 08:00:00,2023-04-06 08:00:00,2,American Airlines,39.872,-75.241,106.00,106.00,588386.0
3,474120-a87f749cf54b4f4ca9262d07080a06b2,YQB,san francisco,SFO,2,True,2023-02-02 07:00:00,2023-02-07 07:00:00,0,Alaska Airlines,37.619,-122.375,1507.83,3015.66,474120.0
4,600395-79e934e3dcf2462587a0beffbebd6783,HOU,memphis,MEM,2,True,2023-04-21 13:00:00,2023-04-24 13:00:00,0,Delta Air Lines,35.042,-89.977,320.19,640.38,600395.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13186,489785-3536fb4b92214634ba0d79ff10d884df,ATL,new york,JFK,1,True,2023-03-07 15:00:00,2023-03-12 15:00:00,1,Lufthansa,40.640,-73.779,218.01,218.01,489785.0
13187,525053-971b75befda441c7b1af7500e4652ddf,YYC,milwaukee,MKE,1,True,2023-03-31 16:00:00,2023-04-03 16:00:00,0,AirTran Airways,42.947,-87.896,398.92,398.92,525053.0
13188,583623-70097bf1d1334cfb891ac2bd09f448ae,TUS,montreal,YHU,1,True,2023-02-23 11:00:00,2023-02-27 11:00:00,0,American Airlines,45.517,-73.417,633.95,633.95,583623.0
13191,497042-fa163de72e234eaf9c9bc8b477fbf66a,FAT,san antonio,RND,1,True,2023-04-13 07:00:00,2023-04-16 07:00:00,0,United Airlines,29.529,-98.279,369.10,369.10,497042.0


In [None]:
# Average flight seats per user (feature 10)
user_seats = flights_valid. groupby('user_id') ['seats'].mean ()
# Average flight price per user (feature 11)
user_flight_price = flights_valid.groupby('user_id') ['price'].mean()
user_flight_price = user_flight_price.rename('flight_price')
# Amount of flights per user (feature 12)
user_flights = flights_valid. groupby('user_id') ['trip_id']. nunique()
user_flights = user_flights. rename('flight_count')
# Amount of checked bags per user (feature 13)
user_checked_bags = flights_valid.groupby('user_id')['checked_bags'].mean()

# **Features from Users**

In [None]:
print(users.shape)
users.head(5)


(5782, 11)


Unnamed: 0,user_id,birthdate,gender,married,has_children,home_country,home_city,home_airport,home_airport_lat,home_airport_lon,sign_up_date
0,531931,1984-05-26,F,False,False,usa,san jose,SJC,37.362,-121.929,2023-01-15
1,611065,1981-08-20,F,False,False,usa,los angeles,LAX,33.942,-118.408,2023-02-22
2,229330,1976-11-08,F,True,False,usa,colorado springs,COS,38.806,-104.7,2022-06-27
3,586762,1987-09-26,F,False,False,canada,montreal,YUL,45.468,-73.741,2023-02-11
4,536035,1995-11-09,M,False,False,usa,new york,LGA,40.777,-73.872,2023-01-17


In [None]:
# feature 14 (avg distnace in km flown by each user_id)

# flights_valid table is merged  with users to get home airport coordinates
flights_with_user_info = pd.merge(
    flights_valid,
    users[['user_id', 'home_airport_lat', 'home_airport_lon']],
    on='user_id',
    how='left'
)

# Define the Haversine function to calculate the distance from home airport to destination airport
def haversine(lat1, lon1, lat2, lon2):
    R = 6371  # Earth's radius in kilometers
    phi1 = np.radians(lat1)
    phi2 = np.radians(lat2)
    delta_phi = np.radians(lat2 - lat1)
    delta_lambda = np.radians(lon2 - lon1)

    a = np.sin(delta_phi / 2.0) ** 2 + \
        np.cos(phi1) * np.cos(phi2) * np.sin(delta_lambda / 2.0) ** 2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    return R * c

# Calculate the distance
flights_with_user_info['distance_km'] = haversine(
    flights_with_user_info['home_airport_lat'],
    flights_with_user_info['home_airport_lon'],
    flights_with_user_info['destination_airport_lat'],
    flights_with_user_info['destination_airport_lon']
)

# Group by user and calculate average distance is flown
avg_distance_flown_km = flights_with_user_info.groupby('user_id')['distance_km'].mean().reset_index()
avg_distance_flown_km.rename(columns={'distance_km': 'avg_km_flown'}, inplace=True)

# Optional: Display result
print(avg_distance_flown_km.head())
print(avg_distance_flown_km.shape)


    user_id  avg_km_flown
0   94883.0   1453.666128
1  101486.0    965.340568
2  101961.0   1321.684183
3  118043.0   2455.024509
4  125845.0   2700.781458
(4855, 2)


In [None]:
users['birthdate'] = pd. to_datetime(users['birthdate'])
today = pd.Timestamp.today()
users['age'] = (today-users[ 'birthdate']).dt.days//365 # feature 15
users ['tenure_months'] = ((today-pd. to_datetime(users['sign_up_date'])).dt.days//30.41).astype('int64') # feature 16
users


Unnamed: 0,user_id,birthdate,gender,married,has_children,home_country,home_city,home_airport,home_airport_lat,home_airport_lon,sign_up_date,age,tenure_months
0,531931,1984-05-26,F,False,False,usa,san jose,SJC,37.362,-121.929,2023-01-15,40,26
1,611065,1981-08-20,F,False,False,usa,los angeles,LAX,33.942,-118.408,2023-02-22,43,25
2,229330,1976-11-08,F,True,False,usa,colorado springs,COS,38.806,-104.700,2022-06-27,48,33
3,586762,1987-09-26,F,False,False,canada,montreal,YUL,45.468,-73.741,2023-02-11,37,25
4,536035,1995-11-09,M,False,False,usa,new york,LGA,40.777,-73.872,2023-01-17,29,26
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5777,521235,1989-06-21,F,False,False,usa,seattle,SEA,47.449,-122.309,2023-01-10,35,26
5778,591155,1982-12-26,M,True,False,canada,windsor,YQG,42.276,-82.956,2023-02-13,42,25
5779,600351,1976-07-15,F,True,False,usa,portland,PDX,45.589,-122.597,2023-02-17,48,25
5780,577044,1969-07-26,F,True,True,usa,chicago,UGN,42.422,-87.868,2023-02-06,55,26


16 Features are created


# **Join Features**

In [None]:
# features (15 - 21)
user_features = users[['user_id','gender','married','home_country', 'has_children','age','tenure_months' ]].copy()
user_features


Unnamed: 0,user_id,gender,married,home_country,has_children,age,tenure_months
0,531931,F,False,usa,False,40,26
1,611065,F,False,usa,False,43,25
2,229330,F,True,usa,False,48,33
3,586762,F,False,canada,False,37,25
4,536035,M,False,usa,False,29,26
...,...,...,...,...,...,...,...
5777,521235,F,False,usa,False,35,26
5778,591155,M,True,canada,False,42,25
5779,600351,F,True,usa,False,48,25
5780,577044,F,True,usa,True,55,26


In [None]:
user_features = user_features.merge(amount_of_trips, on='user_id', how='left').merge(amount_of_sessions, on='user_id', how='left').merge(avg_page_clicks, on='user_id', how='left').merge(canceled_trips_count, on='user_id', how='left').merge(valid_session_duration, on='user_id', how='left').merge(user_rooms, on='user_id', how='left').merge(user_nights, on='user_id', how='left').merge(user_hotel_price, on='user_id', how='left').merge(user_hotels, on='user_id', how='left').merge(user_seats, on='user_id', how='left'). merge(user_flight_price, on='user_id', how='left').merge(user_flights, on='user_id', how='left').merge(user_checked_bags, on='user_id',
how='left').merge(avg_distance_flown_km, on='user_id', how='left')
user_features



Unnamed: 0,user_id,gender,married,home_country,has_children,age,tenure_months,amount_of_trips,amount_of_sessions,avg_page_clicks,...,session_duration,rooms,nights,hotel_price,hotel_count,seats,flight_price,flight_count,checked_bags,avg_km_flown
0,531931,F,False,usa,False,40,26,2,8,9.375000,...,0 days 00:01:10.875000,1.500000,3.500000,714.000000,2.0,1.500000,1676.630000,2.0,0.500000,3238.749749
1,611065,F,False,usa,False,43,25,4,8,19.875000,...,0 days 00:02:26.750000,1.500000,2.250000,677.750000,4.0,1.500000,1790.872500,4.0,0.750000,3342.929238
2,229330,F,True,usa,False,48,33,4,8,16.125000,...,0 days 00:01:59.625000,1.750000,2.000000,1028.500000,4.0,1.750000,720.802500,4.0,0.250000,1201.119453
3,586762,F,False,canada,False,37,25,3,8,15.750000,...,0 days 00:01:56.375000,1.333333,7.333333,2140.666667,3.0,1.333333,762.496667,3.0,0.333333,2818.710430
4,536035,M,False,usa,False,29,26,2,8,13.875000,...,0 days 00:01:42.750000,1.500000,1.500000,291.000000,2.0,1.500000,931.700000,2.0,0.500000,1484.887202
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5777,521235,F,False,usa,False,35,26,5,8,24.250000,...,0 days 00:02:59.875000,1.600000,5.400000,918.200000,5.0,1.800000,1541.054000,5.0,1.200000,2816.228255
5778,591155,M,True,canada,False,42,25,2,8,15.500000,...,0 days 00:01:55.125000,1.000000,4.000000,541.500000,2.0,1.000000,546.350000,1.0,1.000000,3163.054440
5779,600351,F,True,usa,False,48,25,2,8,15.000000,...,0 days 00:01:52.500000,1.000000,5.000000,1300.000000,1.0,1.000000,523.340000,2.0,0.500000,3121.115690
5780,577044,F,True,usa,True,55,26,3,9,20.333333,...,0 days 00:02:30.111111111,1.000000,1.000000,270.500000,2.0,1.333333,449.743333,3.0,0.333333,1193.525629


In [None]:
user_features.head(5)

Unnamed: 0,user_id,gender,married,home_country,has_children,age,tenure_months,amount_of_trips,amount_of_sessions,avg_page_clicks,...,session_duration,rooms,nights,hotel_price,hotel_count,seats,flight_price,flight_count,checked_bags,avg_km_flown
0,531931,F,False,usa,False,40,26,2,8,9.375,...,0 days 00:01:10.875000,1.5,3.5,714.0,2.0,1.5,1676.63,2.0,0.5,3238.749749
1,611065,F,False,usa,False,43,25,4,8,19.875,...,0 days 00:02:26.750000,1.5,2.25,677.75,4.0,1.5,1790.8725,4.0,0.75,3342.929238
2,229330,F,True,usa,False,48,33,4,8,16.125,...,0 days 00:01:59.625000,1.75,2.0,1028.5,4.0,1.75,720.8025,4.0,0.25,1201.119453
3,586762,F,False,canada,False,37,25,3,8,15.75,...,0 days 00:01:56.375000,1.333333,7.333333,2140.666667,3.0,1.333333,762.496667,3.0,0.333333,2818.71043
4,536035,M,False,usa,False,29,26,2,8,13.875,...,0 days 00:01:42.750000,1.5,1.5,291.0,2.0,1.5,931.7,2.0,0.5,1484.887202


In [None]:
user_features['canceled_trips'].value_counts()

Unnamed: 0_level_0,count
canceled_trips,Unnamed: 1_level_1
1.0,559
2.0,15


In [None]:
# return the user_features table where rows contain NaN
# Access the user_features DataFrame
user_features_nan = user_features[user_features.isnull().any(axis=1)]

# Display the rows with NaN values
user_features_nan


Unnamed: 0,user_id,gender,married,home_country,has_children,age,tenure_months,amount_of_trips,amount_of_sessions,avg_page_clicks,...,session_duration,rooms,nights,hotel_price,hotel_count,seats,flight_price,flight_count,checked_bags,avg_km_flown
0,531931,F,False,usa,False,40,26,2,8,9.375000,...,0 days 00:01:10.875000,1.500000,3.500000,714.000000,2.0,1.500000,1676.630000,2.0,0.500000,3238.749749
1,611065,F,False,usa,False,43,25,4,8,19.875000,...,0 days 00:02:26.750000,1.500000,2.250000,677.750000,4.0,1.500000,1790.872500,4.0,0.750000,3342.929238
2,229330,F,True,usa,False,48,33,4,8,16.125000,...,0 days 00:01:59.625000,1.750000,2.000000,1028.500000,4.0,1.750000,720.802500,4.0,0.250000,1201.119453
3,586762,F,False,canada,False,37,25,3,8,15.750000,...,0 days 00:01:56.375000,1.333333,7.333333,2140.666667,3.0,1.333333,762.496667,3.0,0.333333,2818.710430
4,536035,M,False,usa,False,29,26,2,8,13.875000,...,0 days 00:01:42.750000,1.500000,1.500000,291.000000,2.0,1.500000,931.700000,2.0,0.500000,1484.887202
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5777,521235,F,False,usa,False,35,26,5,8,24.250000,...,0 days 00:02:59.875000,1.600000,5.400000,918.200000,5.0,1.800000,1541.054000,5.0,1.200000,2816.228255
5778,591155,M,True,canada,False,42,25,2,8,15.500000,...,0 days 00:01:55.125000,1.000000,4.000000,541.500000,2.0,1.000000,546.350000,1.0,1.000000,3163.054440
5779,600351,F,True,usa,False,48,25,2,8,15.000000,...,0 days 00:01:52.500000,1.000000,5.000000,1300.000000,1.0,1.000000,523.340000,2.0,0.500000,3121.115690
5780,577044,F,True,usa,True,55,26,3,9,20.333333,...,0 days 00:02:30.111111111,1.000000,1.000000,270.500000,2.0,1.333333,449.743333,3.0,0.333333,1193.525629


In [None]:
user_features['gender'].value_counts()

Unnamed: 0_level_0,count
gender,Unnamed: 1_level_1
F,5118
M,653
O,11


In [None]:
user_features.describe()

Unnamed: 0,user_id,age,tenure_months,amount_of_trips,amount_of_sessions,avg_page_clicks,canceled_trips,session_duration,rooms,nights,hotel_price,hotel_count,seats,flight_price,flight_count,checked_bags,avg_km_flown
count,5782.0,5782.0,5782.0,5782.0,5782.0,5782.0,574.0,5782,5140.0,5140.0,5140.0,5140.0,4855.0,4855.0,4855.0,4855.0,4855.0
mean,547670.236077,42.642338,25.995503,2.677966,8.204082,17.596754,1.026132,0 days 00:01:55.272506561,1.200513,4.29767,928.610853,2.600973,1.185156,677.675536,2.605973,0.579303,2174.561799
std,64035.39454,12.047737,1.154542,1.520225,0.459986,8.787925,0.159668,0 days 00:00:35.760349934,0.354829,2.74033,948.106056,1.277174,0.36767,1356.791234,1.284694,0.457929,1249.356241
min,94883.0,18.0,22.0,0.0,8.0,4.125,1.0,0 days 00:00:30.666666666,1.0,0.0,0.0,1.0,1.0,8.17,1.0,0.0,47.744509
25%,519413.75,36.0,26.0,2.0,8.0,12.444444,1.0,0 days 00:01:30.555555555,1.0,2.5,413.5,2.0,1.0,287.74,2.0,0.25,1455.235111
50%,542279.5,43.0,26.0,3.0,8.0,15.625,1.0,0 days 00:01:51.500000,1.0,3.666667,667.0,2.0,1.0,415.833333,2.0,0.5,2009.265445
75%,576215.5,50.0,26.0,4.0,8.0,19.625,1.0,0 days 00:02:16,1.333333,5.25,1094.0,3.0,1.25,647.8,3.0,1.0,2654.288276
max,844489.0,89.0,37.0,8.0,12.0,109.125,2.0,0 days 00:07:11.333333333,4.0,30.0,14057.0,8.0,6.0,41666.05,8.0,5.0,15813.840702


we can write canceled_trips = 0 where it is NaN. But in flight_price we can not write 0 where it is NaN cause then mean will be wrong.

In [None]:
user_features.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5782 entries, 0 to 5781
Data columns (total 21 columns):
 #   Column              Non-Null Count  Dtype          
---  ------              --------------  -----          
 0   user_id             5782 non-null   int64          
 1   gender              5782 non-null   object         
 2   married             5782 non-null   bool           
 3   home_country        5782 non-null   object         
 4   has_children        5782 non-null   bool           
 5   age                 5782 non-null   int64          
 6   tenure_months       5782 non-null   int64          
 7   amount_of_trips     5782 non-null   int64          
 8   amount_of_sessions  5782 non-null   int64          
 9   avg_page_clicks     5782 non-null   float64        
 10  canceled_trips      574 non-null    float64        
 11  session_duration    5782 non-null   timedelta64[ns]
 12  rooms               5140 non-null   float64        
 13  nights              5140 non-null

In [None]:
user_features.tail(5)

Unnamed: 0,user_id,gender,married,home_country,has_children,age,tenure_months,amount_of_trips,amount_of_sessions,avg_page_clicks,...,session_duration,rooms,nights,hotel_price,hotel_count,seats,flight_price,flight_count,checked_bags,avg_km_flown
5777,521235,F,False,usa,False,35,26,5,8,24.25,...,0 days 00:02:59.875000,1.6,5.4,918.2,5.0,1.8,1541.054,5.0,1.2,2816.228255
5778,591155,M,True,canada,False,42,25,2,8,15.5,...,0 days 00:01:55.125000,1.0,4.0,541.5,2.0,1.0,546.35,1.0,1.0,3163.05444
5779,600351,F,True,usa,False,48,25,2,8,15.0,...,0 days 00:01:52.500000,1.0,5.0,1300.0,1.0,1.0,523.34,2.0,0.5,3121.11569
5780,577044,F,True,usa,True,55,26,3,9,20.333333,...,0 days 00:02:30.111111111,1.0,1.0,270.5,2.0,1.333333,449.743333,3.0,0.333333,1193.525629
5781,597300,F,True,usa,True,37,25,2,8,11.375,...,0 days 00:01:23.750000,1.5,8.0,2365.0,2.0,1.5,486.595,2.0,0.0,1494.297709


In [None]:
user_features['canceled_trips']= user_features['canceled_trips'].fillna(0)
user_features['canceled_trips'].value_counts()

Unnamed: 0_level_0,count
canceled_trips,Unnamed: 1_level_1
0.0,5208
1.0,559
2.0,15


In [None]:
user_features ['flight_count'] = user_features['flight_count']. fillna(0)
user_features ['hotel_count'] = user_features ['hotel_count']. fillna (0)
user_features['rooms']= user_features['rooms'].fillna(0)
user_features['nights']= user_features['nights'].fillna(0)
user_features['seats']= user_features['seats'].fillna(0)
user_features['avg_km_flown']= user_features['avg_km_flown'].fillna(0)


print(user_features['flight_count'].value_counts())
print(user_features['hotel_count'].value_counts())


flight_count
2.0    1369
3.0    1232
1.0    1105
0.0     927
4.0     753
5.0     291
6.0      90
7.0      14
8.0       1
Name: count, dtype: int64
hotel_count
2.0    1430
3.0    1328
1.0    1179
4.0     780
0.0     642
5.0     325
6.0      86
7.0      11
8.0       1
Name: count, dtype: int64


In [None]:
user_features.rename(columns={ 'session_duration': 'session_duration_avg', 'rooms':'rooms_avg', 'nights':'nights_avg',
'hotel_price': 'hotel_price_avg', 'seats': 'seats_avg', 'flight_price':'flight_price_avg', 'checked_bags':'checked_bags_avg'}, inplace=True)

In [None]:
user_features['session_duration_avg_sec']=user_features['session_duration_avg'].dt.total_seconds()
# drop session_duration_avg
user_features.drop('session_duration_avg', axis=1, inplace=True)
user_features['session_duration_avg_sec']

Unnamed: 0,session_duration_avg_sec
0,70.875000
1,146.750000
2,119.625000
3,116.375000
4,102.750000
...,...
5777,179.875000
5778,115.125000
5779,112.500000
5780,150.111111


In [None]:
# Replace gender values
user_features['gender'] = user_features['gender'].str.strip().str.upper()
user_features['gender'] = user_features['gender'].replace({'F': 1, 'M': 0, 'O': 2})

user_features['gender'].value_counts()

  user_features['gender'] = user_features['gender'].replace({'F': 1, 'M': 0, 'O': 2})


Unnamed: 0_level_0,count
gender,Unnamed: 1_level_1
1,5118
0,653
2,11


In [None]:
user_features

Unnamed: 0,user_id,gender,married,home_country,has_children,age,tenure_months,amount_of_trips,amount_of_sessions,avg_page_clicks,...,rooms_avg,nights_avg,hotel_price_avg,hotel_count,seats_avg,flight_price_avg,flight_count,checked_bags_avg,avg_km_flown,session_duration_avg_sec
0,531931,1,False,usa,False,40,26,2,8,9.375000,...,1.500000,3.500000,714.000000,2.0,1.500000,1676.630000,2.0,0.500000,3238.749749,70.875000
1,611065,1,False,usa,False,43,25,4,8,19.875000,...,1.500000,2.250000,677.750000,4.0,1.500000,1790.872500,4.0,0.750000,3342.929238,146.750000
2,229330,1,True,usa,False,48,33,4,8,16.125000,...,1.750000,2.000000,1028.500000,4.0,1.750000,720.802500,4.0,0.250000,1201.119453,119.625000
3,586762,1,False,canada,False,37,25,3,8,15.750000,...,1.333333,7.333333,2140.666667,3.0,1.333333,762.496667,3.0,0.333333,2818.710430,116.375000
4,536035,0,False,usa,False,29,26,2,8,13.875000,...,1.500000,1.500000,291.000000,2.0,1.500000,931.700000,2.0,0.500000,1484.887202,102.750000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5777,521235,1,False,usa,False,35,26,5,8,24.250000,...,1.600000,5.400000,918.200000,5.0,1.800000,1541.054000,5.0,1.200000,2816.228255,179.875000
5778,591155,0,True,canada,False,42,25,2,8,15.500000,...,1.000000,4.000000,541.500000,2.0,1.000000,546.350000,1.0,1.000000,3163.054440,115.125000
5779,600351,1,True,usa,False,48,25,2,8,15.000000,...,1.000000,5.000000,1300.000000,1.0,1.000000,523.340000,2.0,0.500000,3121.115690,112.500000
5780,577044,1,True,usa,True,55,26,3,9,20.333333,...,1.000000,1.000000,270.500000,2.0,1.333333,449.743333,3.0,0.333333,1193.525629,150.111111


In [None]:
# return the user_features table where rows contain NaN
# Access the user_features DataFrame
user_features_nan = user_features[user_features.isnull().any(axis=1)]

# Display the rows with NaN values
user_features_nan


Unnamed: 0,user_id,gender,married,home_country,has_children,age,tenure_months,amount_of_trips,amount_of_sessions,avg_page_clicks,...,rooms_avg,nights_avg,hotel_price_avg,hotel_count,seats_avg,flight_price_avg,flight_count,checked_bags_avg,avg_km_flown,session_duration_avg_sec
17,562032,1,False,usa,False,23,26,2,8,15.125000,...,1.5,7.5,2180.0,2.0,0.0,,0.0,,0.000000,110.750
20,517324,1,True,usa,False,70,26,1,8,20.500000,...,0.0,0.0,,0.0,1.0,472.44,1.0,1.0,2548.005699,152.500
22,577944,1,True,usa,False,69,26,1,8,10.750000,...,2.0,6.0,876.0,1.0,0.0,,0.0,,0.000000,80.875
23,565601,0,True,usa,True,51,26,0,8,9.750000,...,0.0,0.0,,0.0,0.0,,0.0,,0.000000,73.375
31,548123,0,True,canada,True,48,26,0,8,8.000000,...,0.0,0.0,,0.0,0.0,,0.0,,0.000000,59.500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5738,629747,1,False,usa,False,20,25,0,8,12.375000,...,0.0,0.0,,0.0,0.0,,0.0,,0.000000,92.125
5740,520561,1,False,usa,True,32,26,0,8,14.875000,...,0.0,0.0,,0.0,0.0,,0.0,,0.000000,111.500
5744,511552,0,False,usa,True,19,27,1,8,19.375000,...,0.0,0.0,,0.0,0.0,,0.0,,0.000000,143.000
5767,481781,1,False,usa,False,76,27,0,9,8.666667,...,0.0,0.0,,0.0,0.0,,0.0,,0.000000,65.000


amount_of_trips=0 /canceled_trips= 0 means , that person has just a sesssion (session duration) but did not book flight and hotel, this is the reason we put 0 in hotel and flight count when it has NaN, and same  in room, nights, seats column. Avg flight price and room price and seat with NaN, are left as it is.

In [None]:
# save the user_features table as csv and download in my pc

from google.colab import files
user_features.to_csv('user_features.csv', encoding = 'utf-8-sig')
files.download('user_features.csv')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>