# Introduction

# Load data

In [1]:
import pandas as pd
df = pd.read_csv('/Users/wangjingran/Desktop/Bone_Muscle_Interaction/data/data_ml.txt', sep='\t')

## Clean Data

In [2]:
from BMINet.utils import Fill_NA
df = Fill_NA(df, na_threshold=0.2, knn_neighbors=5)

> If use Clean Data, the column above threhold will be droped.

> Other NAs will be filled by KNN

# Formula based on Lasso

In [4]:
from BMINet.Model import Lasso_Formula
formula = Lasso_Formula(df, disease_pairs=("A", "B"))
print(formula)

Disease = 114.3001 + 0.0079*L1 + -0.1026*L3 + -0.1874*L4 + -0.1275*L5 + 0.0480*L4-L5_4 + 0.0362*L5-S1_4


# Interaction Detection

## Select features

In [None]:
# Load FeatureSelector
from BMINet.Interaction import FeatureSelector
selector = FeatureSelector(core_name="LightGBM")

# Conduct feature selection from df
selected_features = selector.select(df)

: 

## Stacking Model

In [None]:
from BMINet.Model import StackingModel
# Load stacking model framework
Model = StackingModel()
best_scores = Model.stacking_model_search(df, selected_features,save_dir='/Users/wangjingran/Desktop/BMINet/Save_1')

: 

### Binary-Class Prediction

In [None]:
single_prediction_score = Model.single_predict("A vs C", [...], use_our_model=True)
multiple_prediction_score = Model.multiple_predict("A vs B", [[...], [...]], use_our_model=True)

: 

In [None]:
Model_our = StackingModel()
# Predict from single individual
single_prediction_score = Model_our.single_predict("A vs C", [...], use_our_model=True)
multiple_prediction_score = Model_our.multiple_predict("A vs B", [[...], [...]], use_our_model=True)

: 

> NA values in list should be `None`

### Multi-Class Prediction

In [None]:
Model_our = StackingModel()
category = Model_our.single_predict_multi_classify([...], use_our_model=True, show_route=False)

: 

In [None]:
from BMINet.utils import convert_to_number
f = open("/Users/wangjingran/Desktop/Bone_Muscle_Interaction/TT/test_fold_1.txt", "r")
all_data = []
all_data_new = []
all_text = f.readlines()
for i in all_text:
    text = i.rstrip("\n")
    text = text.split("\t")
    text = text[1:]
    all_data.append(text)
all_data = all_data[1:]
for i in all_data:
    j = convert_to_number(i)
    all_data_new.append(j)
f.close()

: 

In [None]:
from BMINet.Model import StackingModel
Model_our = StackingModel()
category = Model_our.multi_predict_multi_classify(new_data = all_data_new, use_our_model=True, show_route=True,train_on_data='/Users/wangjingran/Desktop/Bone_Muscle_Interaction/TT/train_fold_1.txt')

: 

In [None]:
category = ['Stage C', 'Stage A', 'Stage D', 'Stage B', 'Stage A', 'Stage D', 'Stage A', 'Stage B', 'Stage A', 'Stage D', 'Stage D', 'Stage B', 'Stage B', 'Stage B', 'Stage A', 'Stage A', 'Stage B', 'Stage A', 'Stage A', 'Stage A', 'Stage B', 'Stage B', 'Stage B', 'Stage A', 'Stage D', 'Stage B', 'Stage A', 'Stage D', 'Stage D', 'Stage A', 'Stage C', 'Stage B', 'Stage A', 'Stage B', 'Stage B', 'Stage D', 'Stage D', 'Stage A', 'Stage A', 'Stage B', 'Stage B', 'Stage D', 'Stage B', 'Stage B', 'Stage B', 'Stage D', 'Stage C', 'Stage A', 'Stage B', 'Stage D', 'Stage D', 'Stage D', 'Stage B', 'Stage D', 'Stage B', 'Stage D', 'Stage D', 'Stage A', 'Stage D', 'Stage B', 'Stage B', 'Stage C', 'Stage B', 'Stage B', 'Stage D', 'Stage D', 'Stage C', 'Stage D', 'Stage D', 'Stage B', 'Stage B', 'Stage D', 'Stage C', 'Stage D', 'Stage D', 'Stage D', 'Stage C', 'Stage D', 'Stage B', 'Stage B', 'Stage D', 'Stage D', 'Stage B', 'Stage B', 'Stage D', 'Stage B', 'Stage D', 'Stage C', 'Stage D']

: 

In [None]:
print(category[:21].count('Stage A'))
print(category[:21].count('Stage B'))
print(category[:21].count('Stage C'))
print(category[:21].count('Stage D'))

: 

In [None]:
print(category[21:47].count('Stage A'))
print(category[21:47].count('Stage B'))
print(category[21:47].count('Stage C'))
print(category[21:47].count('Stage D'))

: 

In [None]:
print(category[47:63].count('Stage A'))
print(category[47:63].count('Stage B'))
print(category[47:63].count('Stage C'))
print(category[47:63].count('Stage D'))

: 

In [None]:
print(category[63:].count('Stage A'))
print(category[63:].count('Stage B'))
print(category[63:].count('Stage C'))
print(category[63:].count('Stage D'))

: 

In [None]:
categories = Model.multi_predict_multi_classify(all_data_new, use_our_model=False, show_route=True)

: 

> If you use `use_our model = True`, you are predicting disease stages based on our data and model
>
> If you are researching on a brand new project, use `use_our_model = False`

## Machine Learning Plots

In [None]:
from BMINet.plot import plot_ml_roc
plot_ml_roc(best_scores, color_set="Paired",output_dir='./')

: 

In [None]:
from BMINet.plot import plot_precision_recall
plot_precision_recall(best_scores)

: 

In [None]:
from BMINet.plot import plot_score_histogram
plot_score_histogram(best_scores)

: 

In [None]:
from BMINet.plot import plot_calibration_curve
plot_calibration_curve(best_scores)

: 

## Model Explanation

In [None]:
from BMINet.Interaction import SHAPVisualizer
shap_visualizer = SHAPVisualizer(core_name="LightGBM")
shap_visualizer.train_model(df, selected_features)
shap_visualizer.plot_shap('./Example')
shap_visualizer.plot_dependence('./Example')

: 

## Network Construction

In [None]:
# Load NetworkConstructor
from BMINet.Interaction import NetworkConstructor
network_constructor = NetworkConstructor(core_name="LightGBM", cutoff = 1.5)

# Construct sub-network list for each group
interactions = network_constructor.construct_network(df, selected_features)
# Construct conmbined network
combined_graph = network_constructor.compose_all(interactions)
# Remove isolated nodes from the network
Graph_BMI = network_constructor.remove_isolated_nodes(combined_graph)
# Save to .graphml file
network_constructor.save_graph(Graph_BMI, './Example')

: 

## Network Analysis

### Conduct calculation of centrality and adjacent matrix

In [None]:
from BMINet.Interaction import NetworkMetrics
metrics_calculator = NetworkMetrics(Graph_BMI)
metrics = metrics_calculator.compute_metrics()

: 

### Plots

In [None]:
from BMINet.Interaction import NetworkVisualizer
visualizer = NetworkVisualizer(Graph_BMI, metrics)

: 

In [None]:
visualizer.plot_adjacency_matrix(pdf_path=None)

: 

In [None]:
visualizer.plot_centrality_measures(pdf_path=None)

: 

In [None]:
visualizer.plot_network_communities(pdf_path=None)

: 

# Statistic plots

## Single ROC

In [None]:
from BMINet.plot import plot_single_roc
AUCs = plot_single_roc("Example/data.txt")

: 

## PCA

In [None]:
from BMINet.plot import plot_pca
plot_pca("Example/data.txt")

: 

In [None]:
from BMINet.Model import StackingModel
Model = StackingModel()
score = Model.performance_on_test('/Users/wangjingran/Desktop/Bone_Muscle_Interaction/TT/test_fold_1.txt')

: 