In [59]:
from datetime import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [60]:
file_name = "airbnb_session_data.txt"
df = pd.read_csv(file_name, delimiter ='|', parse_dates=['ds','ts_max','ts_min','next_ts_max','next_ts_min'])

In [61]:
boolean_columns = [
    'did_search', 
    'sent_message',
    'sent_booking_request', 
    'next_did_search',
    'next_sent_message',
    'next_sent_booking_request'
]
df[boolean_columns] = df[boolean_columns].astype(bool)

In [62]:
# session duration is in Minutes
df['session_duration'] = (df['ts_max'] - df['ts_min']) / np.timedelta64(1, 'm')
df['next_session_duration'] = (df['next_ts_max'] - df['next_ts_min']) / np.timedelta64(1, 'm')

In [63]:
df.dtypes

id_visitor                           object
id_session                           object
dim_session_number                    int64
dim_user_agent                       object
dim_device_app_combo                 object
ds                           datetime64[ns]
ts_min                       datetime64[ns]
ts_max                       datetime64[ns]
did_search                             bool
sent_message                           bool
sent_booking_request                   bool
next_id_session                      object
next_dim_session_number             float64
next_dim_user_agent                  object
next_dim_device_app_combo            object
next_ds                              object
next_ts_min                  datetime64[ns]
next_ts_max                  datetime64[ns]
next_did_search                        bool
next_sent_message                      bool
next_sent_booking_request              bool
session_duration                    float64
next_session_duration           

In [64]:
# count of users who didnt do an action on the site
len(np.unique(df[(df['did_search'] == False) & (df['sent_message'] == False) & (df['sent_booking_request'] == False) ]['id_visitor']))

525

In [65]:
visitors_sent_message = np.unique(df[df['sent_message'] == True]['id_visitor'])
len(visitors_sent_message)

89

In [66]:
visitors_did_search = np.unique(df[df['did_search'] == True]['id_visitor'])
len(visitors_did_search)

261

In [67]:
visitors_sent_booking_request = np.unique(df[df['sent_booking_request'] == True]['id_visitor'])
len(visitors_sent_booking_request)

50

In [68]:
# count of visitors who sent booking and send messages
len(np.intersect1d(visitors_sent_booking_request, visitors_sent_message))

49

In [69]:
# count of visitors who sent booking and did search
len(np.intersect1d(visitors_sent_booking_request, visitors_did_search))

49

In [70]:
# count of visitors who sent messages and did search
len(np.intersect1d(visitors_sent_message, visitors_did_search))

84