In [None]:
import io
import base64

import dash
import dash_table
import dash_html_components as html
from jupyter_dash import JupyterDash 
import dash_cytoscape as cyto
from dash.dependencies import Input, Output, State
import dash_core_components as dcc

import math
import numpy as np
import pandas as pd 
from inverse_covariance import QuicGraphicalLassoEBIC

In [None]:
# Dash style (시각화)
# Cytoscape 로 구현
# 수정 필요

stylesheet = [
    
# node style  
    {
        "selector": 'node',
        'style': {
            'position': 'absolute',
            'width': '50%',
            'height': '50%',
            'z-index': 999,
            "content": "data(label)",
            "font-size": "15px",
            "text-valign": "center",
            "text-halign": "center",
            "background-color": "#555",
            "text-outline-color": "#555",
            "text-outline-width": "2px",
            "color": "#FFFFFF",
            "overlay-padding": "6px",
            "z-index": "10"
        }
    },
    
# edge style  
    {    
        "selector": "edge",
          "style": {
            "curve-style": "haystack",
            "haystack-radius": "0.5",
            "line-color": "#bbb",
            "width": "mapData(weight, 0, 1, 1, 8)",
            "overlay-padding": "3px"
  }
},
    
# edge weight 음수일 때 빨간색    
    {
      "selector": "edge[weight<0]",
      "style": {
        "line-color": "#E85655",
        "width": "mapData(weight, -1, 0, 20, 1)",
        "opacity": "mapData(weight, 1, 0, 1, 0.4)",
  }
},
    

# edge weight 양수일 때 파란색
    {
      "selector": "edge[weight>0]",
      "style": {
        "line-color": "#6E8BC7",
        "width": "mapData(weight, 0, 1, 1, 20)",
        "opacity": "mapData(weight, 0, 1, 0.4, 1)",
  }
},

    
]



# Jupyter Dash 실행
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = JupyterDash(__name__, external_stylesheets = external_stylesheets)
app.title = 'Network Capstone Design'

# Dash Layout 관련
# 수정 필요

app.layout = html.Div([
    dcc.Upload(id = 'upload-data', 
        children = html.Div(['Drag and Drop or ', html.A('Select Files')]), 
               
        style = {
        'width': '100%',
        'height': '60px',
        'lineHeight': '60px',
        'borderWidth': '1px',
        'borderStyle': 'dashed',
        'borderRadius': '5px',
        'textAlign': 'center',
        'margin': '10px'
    },
        multiple=True          
    ),
               
    cyto.Cytoscape(id = 'cytoscape', 
        style = {
        'width': '100%',
        'height': '550px'
    }, 
                   
        layout = {
        'name': 'cose',
        'idealEdgeLength': 100,
        'nodeOverlap': 5,
        'refresh': 20,
        'fit': True,
        'padding': 10,
        'randomize': False,
        'componentSpacing': 200,
        'nodeRepulsion': 400000,
        'edgeElasticity': 100,
        'nestingFactor': 5,
        'gravity': 1,
        'numIter': 1000,
        'initialTemp': 200,
        'coolingFactor': 0.95,
        'minTemp': 1.0
    }, 
    
    stylesheet = stylesheet, 
    responsive = True),
        
    html.Div(id = "output-data-upload"),

])

In [None]:
# npn Skeptic Algorithm

# input (x) : raw dataframe

def skeptic(x):
    y = 2 * math.sin(math.pi/6. * x)
    return y

def npn(x):
    df = x.corr(method = "spearman")
    result = df.applymap(skeptic)
    return result

# output (result) : npn skeptic corr dataframe

In [None]:
# Making Lambda set

# input (df) : npn skeptic corr datatframe의 상삼각행렬

def lamSet(df):
    lamMax = max(np.max(df.values), -np.min(df.values))
    
    lamMin = 0.01 * lamMax
    lamMaxX = math.log(lamMax)
    lamMinX = math.log(lamMin)
    
    lam = np.exp(np.append(np.arange(lamMinX, lamMaxX, step = ((lamMaxX)-(lamMinX))/99), (lamMaxX)))
    
    return lam

# output (lam) : lambda 값 list

In [None]:
# computing EBIC

# input (model) : QuicGraphicalLassoEBIC model
# input (n) : data 행 개수
# input (p) : data 열 개수
# input (tr) : npn.values (npn(x)로 계산한 값)
# input (gamma) : gamma 값 (= tuning parameter)

def compute_EBIC(model, n, p, tr, gamma):
    
    prec = model.precision_
    E = (np.sum(np.sum(prec != 0, axis=0))-p)
    MLE = (np.log(np.linalg.det(prec))-np.trace(np.dot(tr, prec))) * n
    EBIC = E * 0.5 * np.log(n) + E * gamma * np.log(p) * 2 - MLE
    
    return EBIC

# output (EBIC) : EBIC 계산값 (float)

In [None]:
# computing Best Alpha

# input (X): raw dataframe
# input (gamma): gamma 값 (= tuning parameter, 0.1로 설정해두었으나 변경 가능합니다.)

def compute_Best_Alpha(X, gamma = 0.1):
    
    tr = npn(X)
    tr = tr.values # npn skeptic
    triu = pd.DataFrame(np.triu(tr, 1)) # 상삼각행렬 구축
    n=X.shape[0] #data 행 개수
    p=X.shape[1] #data 열 개수
    
    lam = lamSet(triu) # lambda list 계산 값 100개
    EBICs = np.zeros(100) # lambda 개수(100개)만큼 EBIC 값 계산해주기 위해 자리 만듦
        
    # EBIC 계산을 위함, lambda 100개에 대하여 계산해야 하므로 100번 반복
    for i in range(100):
        alpha = round(lam[i], 9)
        model = QuicGraphicalLassoEBIC(lam=alpha, auto_scale = True,
                                           verbose=1, tol = 1e-04,
                                           init_method='spearman', path=100, gamma = gamma, 
                                           max_iter=10000, method='quic').fit(X.values)
        
        # lambda 100개에 대해 EBIC 값 계산
        EBIC = compute_EBIC(model, n, p, tr, gamma)
        print("EBIC : "+str(EBIC)+" alpha : "+str(alpha))
        EBICs[i]=EBIC
    
    # EBICs 중 EBIC 값이 가장 작은 lambda 선택해서 best_alpha로 설정 
    # -> best_alpha를 model의 최종 lambda로 설정해서 계산할 것임
    best_idx=np.argmin(EBICs)
    best_alpha=lam[best_idx]
    return float(best_alpha)

In [None]:
# cov to corr

# input (cov) : cov dataframe (estimator.precision_)

def cov2cor( cov ):
    d = np.sqrt(cov.diagonal())
    cor = ((cov.T/d).T)/d
    cor[ np.diag_indices( cov.shape[0] ) ] = np.ones( cov.shape[0] )
    return cor

# output (cor) : cor dataframe

In [None]:
# making Dash Board

@app.callback(
    [Output("output-data-upload", "children"), Output('cytoscape', 'elements')],
    [Input("upload-data", "contents"), Input("upload-data", "filename")])


def update_table(contents, filename):
    
    # 파일 입력 부분
    if contents:
        contents = contents[0]
        filename = filename[0]
        
        content_type, content_string = contents.split(",")
        decoded = base64.b64decode(content_string)
        
        try:
            if "csv" in filename:
                df = pd.read_csv(io.StringIO(decoded.decode("utf-8")))
            elif "xls" or "xlsx" in filename:
                df = pd.read_excel(io.BytesIO(decoded))
            elif "txt" or "tsv" in filename:
                df = pd.read_csv(io.StringIO(decoded.decode("utf-8")), delimiter=r"\s+")
        except Exception as e:
            print(e)
            return html.Div(["There was an error processing this file."])
        
        # ---- 알고리즘 시작 ----
        # 결측치 제거
        df = df.dropna(axis=1, how='all')
        df = df.dropna(axis=0, how='any')
        
        # column name 추출
        columnName = list(df.columns.values)
        
        # best_alpha 계산
        # gamma 값 0.1로 설정해두었으나 변경 가능합니다.
        best_alpha = compute_Best_Alpha(df) 
        
        # best_alpha 이용해서 QuicGraphicalLasso 계산, model 구축
        estimator = QuicGraphicalLassoEBIC(lam=best_alpha, auto_scale = False, 
                                           verbose=1, tol = 1e-04,
                                           init_method='spearman', path=100, gamma=0.1, 
                                           max_iter=10000, method='quic').fit(df.values)
        
        # model.precision_ -> corr 변환 후 상삼각행렬 도출
        df = pd.DataFrame(np.triu(-cov2cor(estimator.precision_),1))
        result = df
        
        df.columns = columnName
        df.index = columnName
        
        # 결과 Matrix 파일 저장
        result.to_csv (r'./my_data_frame.csv', index = True, header=True)
        
        # ---- 알고리즘 끝 ----
        
        # 결과 테이블 dash 에 띄우기
        table = html.Div(
            [
                
                dash_table.DataTable(
                    columns=[{"name": str(i), "id": str(i)} for i in df.columns],
                    data=df.to_dict("records"),
                    style_data={
                        'whiteSpace': 'normal',
                        'height': 'auto',
                    },
                ),
                
            ],
        )
    
        
        # dash 네트워크 시각화를 위한 부분 ...
        # 수정 필요
        df = pd.DataFrame.from_dict(df)
        df = df.stack().reset_index()

        df.columns = ['from', 'to', 'weight']


        nodes = set()        
        cy_edges = []
        cy_nodes = []

        for index, row in df.iterrows():
            source, target, weight = row['from'], row['to'], row['weight']
    
    
            if(weight != 0):
                nodes.add(source)
                cy_nodes.append({"data": {"id": source, "label": source}})
                nodes.add(target)
                cy_nodes.append({"data": {"id": target, "label": target}})

                cy_edges.append({
                    'data': {
                        'source': source,
                        'target': target,
                        'weight': weight
                    }
                })
    
        elements = cy_edges+cy_nodes
        
        return table, elements

    else:
        return {}, {}


# app run
app.run_server(mode='external')