### Featuretools Installation 

Before Running this notebook, you would need to install featuretools in your system. 

**Use the following command:**

$ python -m pip install featuretools

In [1]:
#Import required libraries
import pandas as pd
import featuretools as ft

In [2]:
#Reading a csv file
df = pd.read_csv('bigmart.csv')

In [3]:
#Dimension of the dataframe
df.shape

(8523, 12)

In [4]:
#First  5 rows of the dataframe
df.head()

Unnamed: 0,Item_Identifier,Item_Weight,Item_Fat_Content,Item_Visibility,Item_Type,Item_MRP,Outlet_Identifier,Outlet_Establishment_Year,Outlet_Size,Outlet_Location_Type,Outlet_Type,Item_Outlet_Sales
0,FDA15,9.3,Low Fat,0.016047,Dairy,249.8092,OUT049,1999,Medium,Tier 1,Supermarket Type1,3735.138
1,DRC01,5.92,Regular,0.019278,Soft Drinks,48.2692,OUT018,2009,Medium,Tier 3,Supermarket Type2,443.4228
2,FDN15,17.5,Low Fat,0.01676,Meat,141.618,OUT049,1999,Medium,Tier 1,Supermarket Type1,2097.27
3,FDX07,19.2,Regular,0.0,Fruits and Vegetables,182.095,OUT010,1998,,Tier 3,Grocery Store,732.38
4,NCD19,8.93,Low Fat,0.0,Household,53.8614,OUT013,1987,High,Tier 3,Supermarket Type1,994.7052


In [5]:
#Separate depedent variable from independent variables
features = df.drop('Item_Outlet_Sales', axis=1)
y = df['Item_Outlet_Sales']

### Make an entityset and add the entity

In [6]:
es = ft.EntitySet(id = 'bigmart')
es

Entityset: bigmart
  Entities:
  Relationships:
    No relationships

### Add data to this entityset created

In [7]:
es.entity_from_dataframe(entity_id='data1', dataframe=features, make_index=True, index='index')

Entityset: bigmart
  Entities:
    data1 [Rows: 8523, Columns: 12]
  Relationships:
    No relationships

### Feature Engineering

In [8]:
#Run Deep Feature Synthesis with transformation primitives
feature_matrix, feature_defs = ft.dfs(entityset = es, target_entity='data1', max_depth=1,
                                      trans_primitives = ['add_numeric', 'multiply_numeric'])
feature_matrix.head()

Unnamed: 0_level_0,Item_Identifier,Item_Weight,Item_Fat_Content,Item_Visibility,Item_Type,Item_MRP,Outlet_Identifier,Outlet_Establishment_Year,Outlet_Size,Outlet_Location_Type,...,Item_MRP + Outlet_Establishment_Year,Item_Visibility + Item_Weight,Item_Visibility + Outlet_Establishment_Year,Item_Weight + Outlet_Establishment_Year,Item_MRP * Item_Visibility,Item_MRP * Item_Weight,Item_MRP * Outlet_Establishment_Year,Item_Visibility * Item_Weight,Item_Visibility * Outlet_Establishment_Year,Item_Weight * Outlet_Establishment_Year
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,FDA15,9.3,Low Fat,0.016047,Dairy,249.8092,OUT049,1999,Medium,Tier 1,...,2248.8092,9.316047,1999.016047,2008.3,4.008763,2323.22556,499368.5908,0.14924,32.078555,18590.7
1,DRC01,5.92,Regular,0.019278,Soft Drinks,48.2692,OUT018,2009,Medium,Tier 3,...,2057.2692,5.939278,2009.019278,2014.92,0.930544,285.753664,96972.8228,0.114127,38.729936,11893.28
2,FDN15,17.5,Low Fat,0.01676,Meat,141.618,OUT049,1999,Medium,Tier 1,...,2140.618,17.51676,1999.01676,2016.5,2.373528,2478.315,283094.382,0.293301,33.50339,34982.5
3,FDX07,19.2,Regular,0.0,Fruits and Vegetables,182.095,OUT010,1998,,Tier 3,...,2180.095,19.2,1998.0,2017.2,0.0,3496.224,363825.81,0.0,0.0,38361.6
4,NCD19,8.93,Low Fat,0.0,Household,53.8614,OUT013,1987,High,Tier 3,...,2040.8614,8.93,1987.0,1995.93,0.0,480.982302,107022.6018,0.0,0.0,17743.91


In [9]:
feature_defs

[<Feature: Item_Identifier>,
 <Feature: Item_Weight>,
 <Feature: Item_Fat_Content>,
 <Feature: Item_Visibility>,
 <Feature: Item_Type>,
 <Feature: Item_MRP>,
 <Feature: Outlet_Identifier>,
 <Feature: Outlet_Establishment_Year>,
 <Feature: Outlet_Size>,
 <Feature: Outlet_Location_Type>,
 <Feature: Outlet_Type>,
 <Feature: Item_MRP + Item_Visibility>,
 <Feature: Item_MRP + Item_Weight>,
 <Feature: Item_MRP + Outlet_Establishment_Year>,
 <Feature: Item_Visibility + Item_Weight>,
 <Feature: Item_Visibility + Outlet_Establishment_Year>,
 <Feature: Item_Weight + Outlet_Establishment_Year>,
 <Feature: Item_MRP * Item_Visibility>,
 <Feature: Item_MRP * Item_Weight>,
 <Feature: Item_MRP * Outlet_Establishment_Year>,
 <Feature: Item_Visibility * Item_Weight>,
 <Feature: Item_Visibility * Outlet_Establishment_Year>,
 <Feature: Item_Weight * Outlet_Establishment_Year>]

In [10]:
feature_matrix.shape

(8523, 23)