# Interaction Transformer

This notebook shows the functionality in the `InteractionTransformer` class. This transformer applys the `pd.DataFrame.product` method to the input `X`. <br>
This transformer means that interaction between columns are generated and degree of interaction can be specifically selected.



In [1]:
import sys
sys.path.insert(0,"M:\\PycharmProjects\\AI-delivery\\tubular")

In [2]:
import pandas as pd
import numpy as np
from sklearn.datasets import fetch_california_housing

In [3]:
import tubular
from tubular.numeric import InteractionTransformer

In [4]:
tubular.__version__


'0.3.1'

## Load California housing dataset from sklearn

import os, ssl
if (not os.environ.get('PYTHONHTTPSVERIFY', '') and
    getattr(ssl, '_create_unverified_context', None)): 
    ssl._create_default_https_context = ssl._create_unverified_context

In [5]:
cali = fetch_california_housing()
cali_df = pd.DataFrame(cali['data'], columns=cali['feature_names'])
print(cali_df.shape)
cali_df.head()

URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1108)>

## Simple usage    
### Initialising InteractionTransformer

The user can specify the following; <br>
- `columns` the columns in the `DataFrame` passed to the `transform` method to be transformed <br>
- `min_degree` the minimum degree of expected interaction (default value is 2) <br>
- `max_degre` the maximum degree of expected interaction (default value is 2) <br>


In [7]:
interaction_transformer = InteractionTransformer(
    columns = ['HouseAge','Population', 'MedInc'],
    min_degree = 2,
    max_degree =3
)

### InteractionTransformer fit
There is no fit method for the InteractionTransformer as the methods that it can run do not 'learn' anything from the data.

### InteractionTransformer transform
When running transform with this configuration new interaction columns are added to the input `X` which is the product of selected columns.

In [8]:
cali_df_2 = interaction_transformer.transform(cali_df)
cali_df_2.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,...,B,LSTAT,target,ZN_cat,CHAS_cat,RAD_cat,CRIMxZN,CRIMxINDUS,ZNxINDUS,CRIMxZNxINDUS
0,0.00632,18.0,2.31,0.0,0.538,6.575,,4.09,,296.0,...,396.9,4.98,24.0,18.0,0.0,,0.00632,0.014599,2.31,0.014599
1,0.02731,,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,...,396.9,9.14,21.6,,0.0,2.0,0.02731,0.193082,7.07,0.193082
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,,...,392.83,4.03,34.7,0.0,0.0,2.0,0.02729,0.19294,7.07,0.19294
3,,,2.18,0.0,0.458,,45.8,6.0622,3.0,222.0,...,,,33.4,,0.0,3.0,,,2.18,
4,0.06905,0.0,2.18,0.0,0.458,,,6.0622,3.0,222.0,...,396.9,5.33,36.2,0.0,0.0,3.0,0.06905,0.150529,2.18,0.150529


## Automatically generated columns

In [9]:
auto_generated_name_transformer = InteractionTransformer(
    columns = ['HouseAge','Population', 'MedInc']
)

In [10]:
cali_df_3 = auto_generated_name_transformer.transform(cali_df)
cali_df_3.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,target,ZN_cat,CHAS_cat,RAD_cat,CRIM ZN,CRIM INDUS,ZN INDUS
0,0.00632,18.0,2.31,0.0,0.538,6.575,,4.09,,296.0,15.3,396.9,4.98,24.0,18.0,0.0,,0.00632,0.014599,2.31
1,0.02731,,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6,,0.0,2.0,0.02731,0.193082,7.07
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,,17.8,392.83,4.03,34.7,0.0,0.0,2.0,0.02729,0.19294,7.07
3,,,2.18,0.0,0.458,,45.8,6.0622,3.0,222.0,18.7,,,33.4,,0.0,3.0,,,2.18
4,0.06905,0.0,2.18,0.0,0.458,,,6.0622,3.0,222.0,18.7,396.9,5.33,36.2,0.0,0.0,3.0,0.06905,0.150529,2.18


## Select degree of interaction

Only available on scikit-learn version 1.0>

In [11]:
interaction_deg_3_only_transformer = InteractionTransformer(
    columns = ['HouseAge','Population', 'MedInc'], 
    min_degree = 3,
    max_degree =3
)

In [12]:
cali_df_4 = interaction_deg_3_only_transformer.transform(cali_df)
cali_df_4.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,target,ZN_cat,CHAS_cat,RAD_cat,CRIM ZN INDUS
0,0.00632,18.0,2.31,0.0,0.538,6.575,,4.09,,296.0,15.3,396.9,4.98,24.0,18.0,0.0,,0.014599
1,0.02731,,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6,,0.0,2.0,0.193082
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,,17.8,392.83,4.03,34.7,0.0,0.0,2.0,0.19294
3,,,2.18,0.0,0.458,,45.8,6.0622,3.0,222.0,18.7,,,33.4,,0.0,3.0,
4,0.06905,0.0,2.18,0.0,0.458,,,6.0622,3.0,222.0,18.7,396.9,5.33,36.2,0.0,0.0,3.0,0.150529
