# Setup

In [None]:
%%bash
pip install -r requirements.txt

## Code setup

In [None]:
import os
import pandas as pd

List of runs that can be analyzed, each is a clustering method followed by the number of clusters or other relevant parameters

In [None]:
cwd = os.getcwd()
results_directory = "../results"
results_directory = os.path.join(cwd, results_directory)
for f in os.listdir(results_directory):
    path = os.path.join(results_directory, f)
    if os.path.isdir(path):
        print(f)

## Choose one of the clustering metod from above here

In [None]:
name_analysis = 'kmeans10'

In [None]:
result_path = os.path.join(results_directory, name_analysis)
analysis_path = os.path.join(result_path, "analysis")

# Clusters

In [None]:
pth = os.path.join(result_path, 'results.csv')
clustering_results = pd.read_csv(pth)
clustering_results.head()

Query for a specific drug pair provide names of both drugs in cell below:

In [None]:
name1 = 'imipramine'
name2 = 'bupropion'
clustering_results[(clustering_results['name1'] == name1) & (clustering_results['name2'] == name2)]

# Side effects analysis

## Choose a level of side effects

In [None]:
analysis_level = 'soc'

In [None]:
pth = os.path.join(analysis_path, 'scores_' + analysis_level + '_term.csv')
sideeffect_results = pd.read_csv(pth)
sideeffect_results

## Clusters numbers

In [None]:
sideeffect_results["cluster"].drop_duplicates().values

In [None]:
num_clusters = len(sideeffect_results["cluster"].drop_duplicates().values)

## Choose a specific cluster to inspect

In [None]:
cluster_no = 0

In [None]:
sideeffect_results[sideeffect_results['cluster'] == cluster_no]

In [None]:
alpha = '0.005'

# Significance Analysis

## Choose a level of side effects

In [None]:
analysis_level = 'soc'

In [None]:
pth = os.path.join(analysis_path, 'significant_' + analysis_level + '_ranks_' + alpha + '.csv')
statistical_results = pd.read_csv(pth)
statistical_results

Look at the significant results summary

In [None]:
pth = os.path.join(analysis_path, 'significant_summary.csv')
summary_results = pd.read_csv(pth)
summary_results

Query for specific drugs

In [None]:
name1 = 'imipramine'
name2 = 'bupropion'
summary_results[(summary_results['name1'] == name1) & (summary_results['name2'] == name2)]

# Target distribution for significant clusters

In [None]:
from experiment.interactive_analysis import InteractiveAnalyzer

## Choose the level, the number of clusters to see and the number of targets per clusters to show

In [None]:
analysis_level = 'soc'
cluster_number = num_clusters
targets_per_cluster = 5

This may take some time, it needs to load twosides for further analysis

In [None]:
analyzer = InteractiveAnalyzer(result_path)

In [None]:
significant_clusters, important_targets = analyzer.get_important_data(analysis_level, cluster_number, targets_per_cluster)

In [None]:
significant_clusters

## Choose a specific cluster to inspect from the ones in the table above

In [None]:
cluster_no = 2

In [None]:
important_targets[cluster_no].describe()

## Choose a target to visualize

In [None]:
target = 'Potassium Channel'

In [None]:
important_targets[cluster_no][target].plot.hist()