# Filtering feature sets.
Candidate feature sets are filtered based on their normalised mutual information with the various caseness variables.

## Imports

In [13]:
%run 'UNSEEN_helper_functions.ipynb'
%store -r

## Set parameters

In [14]:
# Order of feature sets, i.e. composed of individual features, pairs or triplets?
ls_m = [1, 2, 3]
# Type of representation of the feature set.
ls_representation = ['all', 'multi']

## Load the caseness variables

In [15]:
%%capture
if 'caseness_array' not in globals():
    %run ./"UNSEEN_create_caseness_variables.ipynb"

## Load feature-set arrays

In [18]:
%%capture
if 'fs_clinician_filteredPossible' not in globals():
    %run ./"UNSEEN_create_clinician_feature_sets.ipynb"

In [19]:
print("\nClinician feature set arrays loaded.")


Clinician feature set arrays loaded.


In [20]:
%%capture
if 'fs_literature_filteredPossible' not in globals():
    %run ./"UNSEEN_create_literature_feature_sets.ipynb"

In [21]:
print("\nLiterature feture set arrays loaded.")


Literature feture set arrays loaded.


In [28]:
#%%capture
#if 'fs_interview_possibleCaseness' not in globals():
#    %run ./"UNSEEN_create_interview_feature_sets.ipynb"

In [29]:
#print("\nInterview feture set arrays loaded.")

In [31]:
%%capture
if 'fs_database_filteredPossible' not in globals():
    %run ./"UNSEEN_create_database_feature_sets.ipynb"

In [25]:
print("\nDatabase feature set arrays loaded.")


Database feature set arrays loaded.


In [26]:
#%%capture
#if 'fs_PPI_possibleCaseness' not in globals():
#    %run ./"UNSEEN_filter_database_feature_sets.ipynb"

In [27]:
#    print("\nfPPI feture set arrays loaded.")

## Combine feature-set arrays from all sources

#### 'Possible caseness'

In [32]:
dfs = [
    fs_database_filteredPossible
    ,fs_clinician_filteredPossible
    ,fs_literature_filteredPossible
    #,fs_interview_filteredPossible
]
fs_possibleCaseness = functools.reduce(lambda left, right: pandas.merge(left, right, on = 'person_id'), dfs)

NameError: name 'fs_database_possibleCaseness' is not defined

#### 'Definite caseness'

In [None]:
dfs = [
    fs_database_filteredDefinite
    ,fs_clinician_filteredDefinite
    ,fs_literature_filteredDefinite
    #,fs_interview_filteredDefinite
]
fs_definiteCaseness = functools.reduce(lambda left, right: pandas.merge(left, right, on = 'person_id'), dfs)

#### 'Multinomial caseness'

In [None]:
dfs = [
    fs_database_filteredMulti
    ,fs_clinician_filteredMulti
    ,fs_literature_filteredMulti
    #,fs_interview_filteredMulti
]
fs_multiCaseness = functools.reduce(lambda left, right: pandas.merge(left, right, on = 'person_id'), dfs)

#### 'Possible-vs-Definite caseness'

In [None]:
dfs = [
    fs_database_filteredPossDef
    ,fs_clinician_filteredPossDef
    ,fs_literature_filteredPossDef
    #,fs_interview_filteredPossDef
]
fs_possdefCaseness = functools.reduce(lambda left, right: pandas.merge(left, right, on = 'person_id'), dfs)

## Select all feature sets

### 'Possible caseness'

In [None]:
for i_m in ls_m:
    for i_representation in ls_representation:
        featuresetmi(featureSet_array = fs_possibleCaseness,
                     casenessVector = caseness_array[['person_id','CMHD_rx_not_dx']],
                     m = m,
                     representation = i_representation,
                     source = 'combined')

### 'Definite caseness'

In [None]:
for i_m in ls_m:
    for i_representation in ls_representation:
        featuresetmi(featureSet_array = fs_definiteCaseness,
                     casenessVector = caseness_array[['person_id','CMHD_dx_and_rx']],
                     m = m,
                     representation = i_representation,
                     source = 'combined')

### 'Multinomial caseness'

In [None]:
for i_m in ls_m:
    for i_representation in ls_representation:
        featuresetmi(featureSet_array = fs_multiCaseness,
                     casenessVector = caseness_array[['person_id','CMHD']],
                     m = m,
                     representation = i_representation,
                     source = 'combined')

### 'Possible-vs-Definite caseness'

In [None]:
for i_m in ls_m:
    for i_representation in ls_representation:
        featuresetmi(featureSet_array = fs_possdefCaseness,
                     casenessVector = caseness_array[['person_id','CMHD_possdef']],
                     m = m,
                     representation = i_representation,
                     source = 'combined')