In [1]:
import featuretools as ft

data = ft.demo.load_mock_customer()

In [2]:
# df1
transactions_df = data['transactions'].merge(data['sessions']).merge(data['customers'])
transactions_df.sample(5)

Unnamed: 0,transaction_id,session_id,transaction_time,product_id,amount,customer_id,device,session_start,zip_code,join_date,date_of_birth
264,380,21,2014-01-01 05:14:10,5,57.09,4,desktop,2014-01-01 05:02:15,60091,2011-04-08 20:08:14,2006-08-15
19,244,10,2014-01-01 02:34:55,2,116.95,2,tablet,2014-01-01 02:31:40,13244,2012-04-15 23:31:04,1986-08-18
314,299,6,2014-01-01 01:32:05,4,64.99,1,tablet,2014-01-01 01:23:25,60091,2011-04-17 10:48:33,1994-07-18
290,78,4,2014-01-01 00:54:10,1,37.5,1,mobile,2014-01-01 00:44:25,60091,2011-04-17 10:48:33,1994-07-18
379,457,27,2014-01-01 06:37:35,1,19.16,1,mobile,2014-01-01 06:34:20,60091,2011-04-17 10:48:33,1994-07-18


In [3]:
#df2
products_df = data['products']
products_df

Unnamed: 0,product_id,brand
0,1,B
1,2,B
2,3,B
3,4,B
4,5,A


In [4]:
es = ft.EntitySet(id='customer_data')

In [5]:
# df1 to es
# entity 1
es = es.entity_from_dataframe(entity_id='transactions',
                             dataframe=transactions_df,
                             index='transaction_id',
                             time_index='transaction_time',
                             variable_types={'product_id':ft.variable_types.Categorical,
                                            'zip_code':ft.variable_types.ZIPCode})

In [6]:
es

Entityset: customer_data
  Entities:
    transactions [Rows: 500, Columns: 11]
  Relationships:
    No relationships

In [7]:
es['transactions'].variables

[<Variable: transaction_id (dtype = index)>,
 <Variable: session_id (dtype = numeric)>,
 <Variable: transaction_time (dtype: datetime_time_index, format: None)>,
 <Variable: amount (dtype = numeric)>,
 <Variable: customer_id (dtype = numeric)>,
 <Variable: device (dtype = categorical)>,
 <Variable: session_start (dtype: datetime, format: None)>,
 <Variable: join_date (dtype: datetime, format: None)>,
 <Variable: date_of_birth (dtype: datetime, format: None)>,
 <Variable: product_id (dtype = categorical)>,
 <Variable: zip_code (dtype = zipcode)>]

In [8]:
# df2 to es
es = es.entity_from_dataframe(entity_id='products',
                             dataframe=products_df,
                             index='product_id')

In [9]:
es

Entityset: customer_data
  Entities:
    transactions [Rows: 500, Columns: 11]
    products [Rows: 5, Columns: 2]
  Relationships:
    No relationships

In [10]:
# add relationship
new_relationship = ft.Relationship(es['products']['product_id'],
                                  es['transactions']['product_id'])
es = es.add_relationship(new_relationship)

In [11]:
# Changing entity
# create new entity and relationship
es = es.normalize_entity(base_entity_id='transactions',
                        new_entity_id='sessions',
                        index='session_id',
                        make_time_index='session_start',
                        additional_variables=['device','customer_id','zip_code','session_start','join_date'])
es

Entityset: customer_data
  Entities:
    transactions [Rows: 500, Columns: 6]
    products [Rows: 5, Columns: 2]
    sessions [Rows: 35, Columns: 6]
  Relationships:
    transactions.product_id -> products.product_id
    transactions.session_id -> sessions.session_id

In [12]:
es['transactions'].variables

[<Variable: transaction_id (dtype = index)>,
 <Variable: session_id (dtype = id)>,
 <Variable: transaction_time (dtype: datetime_time_index, format: None)>,
 <Variable: amount (dtype = numeric)>,
 <Variable: date_of_birth (dtype: datetime, format: None)>,
 <Variable: product_id (dtype = id)>]

In [13]:
es['sessions'].variables

[<Variable: session_id (dtype = index)>,
 <Variable: device (dtype = categorical)>,
 <Variable: customer_id (dtype = numeric)>,
 <Variable: zip_code (dtype = zipcode)>,
 <Variable: session_start (dtype: datetime_time_index, format: None)>,
 <Variable: join_date (dtype: datetime, format: None)>]

In [14]:
es['sessions'].df.head(5)

Unnamed: 0,session_id,device,customer_id,zip_code,session_start,join_date
1,1,desktop,2,13244,2014-01-01 00:00:00,2012-04-15 23:31:04
2,2,mobile,5,60091,2014-01-01 00:17:20,2010-07-17 05:27:50
3,3,mobile,4,60091,2014-01-01 00:28:10,2011-04-08 20:08:14
4,4,mobile,1,60091,2014-01-01 00:44:25,2011-04-17 10:48:33
5,5,mobile,4,60091,2014-01-01 01:11:30,2011-04-08 20:08:14


In [15]:
es['transactions'].df.head(5)

Unnamed: 0,transaction_id,session_id,transaction_time,amount,date_of_birth,product_id
298,298,1,2014-01-01 00:00:00,127.64,1986-08-18,5
2,2,1,2014-01-01 00:01:05,109.48,1986-08-18,2
308,308,1,2014-01-01 00:02:10,95.06,1986-08-18,3
116,116,1,2014-01-01 00:03:15,78.92,1986-08-18,4
371,371,1,2014-01-01 00:04:20,31.54,1986-08-18,3


In [16]:
# create entity for customer
es = es.normalize_entity(base_entity_id='sessions',
                        new_entity_id='customers',
                        index='customer_id',
                        make_time_index='join_date',
                        additional_variables=['zip_code', 'join_date'])
es

Entityset: customer_data
  Entities:
    transactions [Rows: 500, Columns: 6]
    products [Rows: 5, Columns: 2]
    sessions [Rows: 35, Columns: 4]
    customers [Rows: 5, Columns: 3]
  Relationships:
    transactions.product_id -> products.product_id
    transactions.session_id -> sessions.session_id
    sessions.customer_id -> customers.customer_id

In [17]:
feature_matrix, feature_defs = ft.dfs(entityset=es,
                                     target_entity='products')

In [18]:
feature_matrix

Unnamed: 0_level_0,brand,SUM(transactions.amount),STD(transactions.amount),MAX(transactions.amount),SKEW(transactions.amount),MIN(transactions.amount),MEAN(transactions.amount),COUNT(transactions),NUM_UNIQUE(transactions.session_id),MODE(transactions.session_id),...,MODE(transactions.sessions.device),MODE(transactions.YEAR(transaction_time)),MODE(transactions.WEEKDAY(transaction_time)),MODE(transactions.MONTH(date_of_birth)),MODE(transactions.DAY(transaction_time)),MODE(transactions.DAY(date_of_birth)),MODE(transactions.YEAR(date_of_birth)),MODE(transactions.MONTH(transaction_time)),MODE(transactions.WEEKDAY(date_of_birth)),MODE(transactions.sessions.customer_id)
product_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,B,7489.79,42.479989,149.56,0.125525,6.84,73.429314,102,34,3,...,desktop,2014,2,7,1,18,1994,1,0,1
2,B,7021.43,46.336308,149.95,0.151934,5.73,76.319891,92,34,28,...,desktop,2014,2,8,1,18,2006,1,0,4
3,B,7008.12,38.871405,148.31,0.223938,5.89,73.00125,96,35,1,...,desktop,2014,2,8,1,18,2006,1,0,4
4,B,8088.97,42.492501,146.46,-0.132077,5.81,76.311038,106,34,29,...,desktop,2014,2,7,1,18,1994,1,0,1
5,A,7931.55,42.131902,149.02,0.098248,5.91,76.264904,104,34,4,...,mobile,2014,2,7,1,18,1994,1,0,1


In [19]:
feature_matrix.shape

(5, 30)