Deep Feature Synthesis (DFS) is an automated method for performing feature engineering on relational and temporal data.

In [1]:
import featuretools as ft

es = ft.demo.load_mock_customer(return_entityset=True)

In [2]:
es

Entityset: transactions
  Entities:
    transactions [Rows: 500, Columns: 5]
    products [Rows: 5, Columns: 2]
    sessions [Rows: 35, Columns: 4]
    customers [Rows: 5, Columns: 4]
  Relationships:
    transactions.product_id -> products.product_id
    transactions.session_id -> sessions.session_id
    sessions.customer_id -> customers.customer_id

In [3]:
feature_matrix, feature_defs = ft.dfs(entityset=es,
                                     target_entity='customers',
                                     agg_primitives=['count'],
                                     trans_primitives=['month'],
                                     max_depth=1)

In [4]:
feature_matrix

Unnamed: 0_level_0,zip_code,COUNT(sessions),MONTH(join_date),MONTH(date_of_birth)
customer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
5,60091,6,7,7
4,60091,8,4,8
1,60091,8,4,7
3,13244,6,8,11
2,13244,7,4,8


In [5]:
feature_matrix, feature_defs = ft.dfs(entityset=es,
                                     target_entity='customers',
                                     agg_primitives=['mean','sum','mode'],
                                     trans_primitives=['month','hour'],
                                     max_depth=2)

In [7]:
feature_matrix.shape

(5, 17)

In [8]:
feature_matrix[['MEAN(sessions.SUM(transactions.amount))']]

Unnamed: 0_level_0,MEAN(sessions.SUM(transactions.amount))
customer_id,Unnamed: 1_level_1
5,1058.276667
4,1090.96
1,1128.2025
3,1039.436667
2,1028.611429


In [9]:
# Changing target entity

feature_matrix, feature_defs = ft.dfs(entityset=es,
                                     target_entity='sessions',
                                     agg_primitives=['mean','sum','mode'],
                                     trans_primitives=['month','hour'],
                                     max_depth=2)

In [10]:
feature_matrix.head()

Unnamed: 0_level_0,customer_id,device,MEAN(transactions.amount),SUM(transactions.amount),MODE(transactions.product_id),MONTH(session_start),HOUR(session_start),customers.zip_code,MODE(transactions.HOUR(transaction_time)),MODE(transactions.MONTH(transaction_time)),MODE(transactions.products.brand),customers.MODE(sessions.device),customers.MEAN(transactions.amount),customers.SUM(transactions.amount),customers.MODE(transactions.product_id),customers.MONTH(join_date),customers.MONTH(date_of_birth),customers.HOUR(join_date),customers.HOUR(date_of_birth)
session_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
1,2,desktop,76.813125,1229.01,3,1,0,13244,0,1,B,desktop,77.422366,7200.28,4,4,8,23,0
2,5,mobile,74.696,746.96,5,1,0,60091,0,1,A,mobile,80.375443,6349.66,5,7,7,5,0
3,4,mobile,88.6,1329.0,1,1,0,60091,0,1,B,mobile,80.070459,8727.68,2,4,8,20,0
4,1,mobile,64.5572,1613.93,5,1,0,60091,0,1,B,mobile,71.631905,9025.62,4,4,7,10,0
5,4,mobile,70.638182,777.02,5,1,1,60091,1,1,B,mobile,80.070459,8727.68,2,4,8,20,0
