# DISpANALYSIS

In [None]:
# Load the packages needed to run the scripts in this notebook
import numpy as np
import os
import pandas as pd
from pmoss.analysis import compute_diagnosis
from pmoss import create_combination
from pmoss.display import scatterplot_decrease_parameters, plot_pcurve_by_measure, composed_plot, table_of_results
from pmoss.models.exponential_fit import decission_data_exponential
from pmoss.loaders import morphoparam
# Avoid warnings
import warnings
warnings.filterwarnings('ignore')

### Information about the data.
Provide path containing the data (csv or excel) and the name of the file. 

Note: The column identifying the group to which each value belongs to, must have the name "Condition" and should be the first column.

In [None]:
# path containing the data
path = "../data/morphology/"
path = "/Users/esti/Documents/PROYECTOS/3DPROTUCELL/pmoss/pMoSS/data/morphology"
# Name of the file containing the information. It can be a csv or excel file. 
# Note that the column containing the labels of the group must have the name "Condition" 
# and should be the first column of the file.

# You can read either a csv or excel files:

# file_name ="Aging morphology data.csv"
file_name = "Aging morphology data.xlsx"

### Estimation of the p-value function 

Initialization parameters

In [None]:
# number of "n-values" to evaluate (size of N-grid)
grid_size = 100 
# minimum "n-value" to compute Monte Carlo cross-validation
n0 = 2 
# maximum "n-value" to compute Monte Carlo cross-validation
Nmax = 1200

# This value prevents from having only one iteration for the highest "n-value":
# final iterations = k*(m/min(m,Nmax)) where m is the size of group with less observations. 
k = 20 

# This value prevents from having millions of iterations in n0 (the lowest"n-value"):
# initial iterations = np.log((m/n0)*initial_portion) where m is the size of group with less observations. 
initial_portion= 1/3.



Parameters for the calculation of the decision index

In [None]:
alpha = 0.05 # alpha for a 100(1-alpha) statistical significance.
gamma = 5e-06 # gamma in the paper = gamma*alpha.
# Statistitical test to evaluate
test = 'MannWhitneyU'
# Method to estimate the p-value function
method = 'exponential'

Estimation of the p-value function and assesment of the decision index.

In [None]:
pvalues, param, Theta = compute_diagnosis(file_name, path = path, gamma = gamma,
                                          alpha = alpha, grid_size = grid_size,
                                          n0 = n0, Nmax = Nmax,k = k,
                                          initial_portion=initial_portion,
                                          method = method, test = test)

Save the results

In [None]:
# Save computed parameters
pvalues.to_csv(os.path.join(path, "aging_morphology_pvalues.csv"), index = False)

### Plot of results

In [None]:
# Load the data

## Write the path and file_nameif it's different from the previous one or you will compute the analysis from here
# path = '../data/morphology/'
# file_name = 'Aging morphology data.xlsx'

df = pd.read_csv(os.path.join(path, "aging_morphology_pvalues.csv"), sep=',')

# Obtain the data, variables and name of the groups for which you would like to get a plot
data, variables, group_labels = morphoparam(file_name, path = path)

# Declare the variables for which you would like to get a plot
variables={
            '0': 'area (px^2)',
            '1': 'short axis length (px)',
            '2': 'orientation'
            }

# You can create all the combinations from a dictionary with the labels of each group, or declare which combinations you want:
# 1.- All combinations should be written exactly as in the csv of the p-values.

# group_labels = {'0':'A02',
#             '1':'A03',
#             '2':'A09',
#             '3':'A16',
#             '4':'A29',
#             '5':'A35',
#             '6':'A55',
#             '7':'A65',
#             '8':'A85',
#             '9':'A96'
#             }
#combination = create_combination(group_labels)

# 2.- Set the desired combinations
combination={
 '0': 'A02_A03',
 '1': 'A02_A09',
 '2': 'A02_A16',
 '3': 'A02_A29',
 '4': 'A02_A35',
 '5': 'A02_A55', 
 '6': 'A02_A65', 
 '7': 'A02_A85', 
 '8': 'A02_A96'
 }

# Load the data related to exponential parameters:

# param = pd.read_csv('../data/morphology/aging_morphology_param.csv',sep=',')

# or calculate it:
param = decission_data_exponential(df, combination, variables, sign_level = 0.05, gamma = 5e-06)

In [None]:
# print the results:
table = table_of_results(param, variables, combination)
table

In [None]:
# plot 
scatterplot_decrease_parameters(df, combination,variables, path = path, fs = 10, width = 5, height = 5, 
                                plot_type="exp-param")

In [None]:
scatterplot_decrease_parameters(df, combination,variables, path = path, fs = 10, width = 5, height = 5, 
                                plot_type="sampled-nalpha")

In [None]:
scatterplot_decrease_parameters(df, combination,variables, path = path, fs = 10, width = 5, height = 5,
                                plot_type="theory-nalpha")

In [None]:
plot_pcurve_by_measure(df, combination, variables, path = path)

In [None]:
composed_plot(data, df, group_labels, combination, variables, fs = 23, width = 37, height = 15, bins = 100)