# Filter literature feature sets.

Candidate feature sets inspired by a literature review are filtered based on their entropy. Further details are in this notebook's parent notebook "UNSEEN filter feature sets.ipynb".

## Imports

In [1]:
import itertools
import sklearn.metrics

## Load literature feature-set array

Here, we run the notebook that creates the literature feature-set array. We will then save the feature-set array as "my_featureSet_array", so that the remaining syntax in this notebook is common for all feature-set sources.


It is assumed that the caseness variables have already been created in the parent notebook.

In [2]:
%%capture
%run ./"UNSEEN create literature feature sets.ipynb"
my_featureSet_array = fs_literature

## Filter feature sets.

### 1. Mutual information of individual feature sets and the caseness variables.

In [3]:
# Set the order of the composite: 1 = individual, 2 = pair, 3 = triplet.
m = 1

#### 1.1. Multinomial caseness

##### 1.1.1. ALL representation.

In [4]:
featuresetmi(featureSet_array = my_featureSet_array,
             casenessVector = caseness_array[['person_id','CMHD']],
             m = m,
             representation = "all")

NameError: name 'featuresetmi' is not defined

##### 1.1.2. MULTI representation.

In [None]:
featuresetmi(featureSet_array = my_featureSet_array,
             casenessVector = caseness_array[['person_id','CMHD']],
             m = m,
             representation = "multi")

#### 1.2. Definitive caseness

##### 1.2.1. ALL representation.

In [None]:
featuresetmi(featureSet_array = my_featureSet_array,
             casenessVector = caseness_array[['person_id','CMHD_dx_and_rx']],
             m = m,
             representation = "all")

##### 1.2.2. MULTI representation.

In [None]:
featuresetmi(featureSet_array = my_featureSet_array,
             casenessVector = caseness_array[['person_id','CMHD_dx_and_rx']],
             m = m,
             representation = "multi")

#### 1.3. Possible caseness

##### 1.3.1. ALL representation.

In [None]:
featuresetmi(featureSet_array = my_featureSet_array,
             casenessVector = caseness_array[['person_id','CMHD_rx_not_dx']],
             m = m,
             representation = "all")

##### 1.3.2. MULTI representation.

In [None]:
featuresetmi(featureSet_array = my_featureSet_array,
             casenessVector = caseness_array[['person_id','CMHD_rx_not_dx']],
             m = m,
             representation = "multi")

#### 1.4. No caseness (i.e. control group)

##### 1.4.1. ALL representation.

In [None]:
featuresetmi(featureSet_array = my_featureSet_array,
             casenessVector = caseness_array[['person_id','CMHD_control']],
             m = m,
             representation = "all")

##### 1.4.2. MULTI representation.

In [None]:
featuresetmi(featureSet_array = my_featureSet_array,
             casenessVector = caseness_array[['person_id','CMHD_control']],
             m = m,
             representation = "multi")

### 2. Mutual information of pair-composite feature sets and the caseness variables.

In [None]:
# Set the order of the composite: 1 = individual, 2 = pair, 3 = triplet.
m = 2

#### 1.1. Multinomial caseness

##### 1.1.1. ALL representation.

In [None]:
featuresetmi(featureSet_array = my_featureSet_array,
             casenessVector = caseness_array[['person_id','CMHD']],
             m = m,
             representation = "all")

##### 1.1.2. MULTI representation.

In [None]:
featuresetmi(featureSet_array = my_featureSet_array,
             casenessVector = caseness_array[['person_id','CMHD']],
             m = m,
             representation = "multi")

#### 1.2. Definitive caseness

##### 1.2.1. ALL representation.

In [None]:
featuresetmi(featureSet_array = my_featureSet_array,
             casenessVector = caseness_array[['person_id','CMHD_dx_and_rx']],
             m = m,
             representation = "all")

##### 1.2.2. MULTI representation.

In [None]:
featuresetmi(featureSet_array = my_featureSet_array,
             casenessVector = caseness_array[['person_id','CMHD_dx_and_rx']],
             m = m,
             representation = "multi")

#### 1.3. Possible caseness

##### 1.3.1. ALL representation.

In [None]:
featuresetmi(featureSet_array = my_featureSet_array,
             casenessVector = caseness_array[['person_id','CMHD_rx_not_dx']],
             m = m,
             representation = "all")

##### 1.3.2. MULTI representation.

In [None]:
featuresetmi(featureSet_array = my_featureSet_array,
             casenessVector = caseness_array[['person_id','CMHD_rx_not_dx']],
             m = m,
             representation = "multi")

#### 1.4. No caseness (i.e. control group)

##### 1.4.1. ALL representation.

In [None]:
featuresetmi(featureSet_array = my_featureSet_array,
             casenessVector = caseness_array[['person_id','CMHD_control']],
             m = m,
             representation = "all")

##### 1.4.2. MULTI representation.

In [None]:
featuresetmi(featureSet_array = my_featureSet_array,
             casenessVector = caseness_array[['person_id','CMHD_control']],
             m = m,
             representation = "multi")

### 3. Mutual information of triplet-composite feature sets and the caseness variables.

In [None]:
# Set the order of the composite: 1 = individual, 2 = pair, 3 = triplet.
m = 3

#### 1.1. Multinomial caseness

##### 1.1.1. ALL representation.

In [None]:
featuresetmi(featureSet_array = my_featureSet_array,
             casenessVector = caseness_array[['person_id','CMHD']],
             m = m,
             representation = "all")

##### 1.1.2. MULTI representation.

In [None]:
featuresetmi(featureSet_array = my_featureSet_array,
             casenessVector = caseness_array[['person_id','CMHD']],
             m = m,
             representation = "multi")

#### 1.2. Definitive caseness

##### 1.2.1. ALL representation.

In [None]:
featuresetmi(featureSet_array = my_featureSet_array,
             casenessVector = caseness_array[['person_id','CMHD_dx_and_rx']],
             m = m,
             representation = "all")

##### 1.2.2. MULTI representation.

In [None]:
featuresetmi(featureSet_array = my_featureSet_array,
             casenessVector = caseness_array[['person_id','CMHD_dx_and_rx']],
             m = m,
             representation = "multi")

#### 1.3. Possible caseness

##### 1.3.1. ALL representation.

In [None]:
featuresetmi(featureSet_array = my_featureSet_array,
             casenessVector = caseness_array[['person_id','CMHD_rx_not_dx']],
             m = m,
             representation = "all")

##### 1.3.2. MULTI representation.

In [None]:
featuresetmi(featureSet_array = my_featureSet_array,
             casenessVector = caseness_array[['person_id','CMHD_rx_not_dx']],
             m = m,
             representation = "multi")

#### 1.4. No caseness (i.e. control group)

##### 1.4.1. ALL representation.

In [None]:
featuresetmi(featureSet_array = my_featureSet_array,
             casenessVector = caseness_array[['person_id','CMHD_control']],
             m = m,
             representation = "all")

##### 1.4.2. MULTI representation.

In [None]:
featuresetmi(featureSet_array = my_featureSet_array,
             casenessVector = caseness_array[['person_id','CMHD_control']],
             m = m,
             representation = "multi")