In [None]:
import numpy as np 

# taken from: https://stackoverflow.com/questions/39512260/calculating-gini-coefficient-in-python-numpy
def gini(x):
    # (Warning: This is a concise implementation, but it is O(n**2)
    # in time and memory, where n = len(x).  *Don't* pass in huge
    # samples!)

    # Mean absolute difference
    mad = np.abs(np.subtract.outer(x, x)).mean()
    # Relative mean absolute difference
    rmad = mad/np.mean(x)
    # Gini coefficient
    g = 0.5 * rmad
    return g

In [None]:
import plotly.graph_objs as go
from plotly.subplots import make_subplots

colors = {'Small':'#65B32E',
          'Low':'#65B32E',
          'Medium':'#EFD522',
          'Large': '#D37F27',
          'High': '#D37F27',
          'ExtraLarge': '#870D88',
          'VeryHigh': '#870D88'}

sizeRatings = ["Small", "Medium", "Large", "ExtraLarge"]
complexityRatings = ["Low", "Medium", "High", "VeryHigh"]
color_list = [colors["Small"], colors["Medium"], colors["Large"], colors["ExtraLarge"]]

def create_side_by_side_plot(df, title, valueColumn, ratings):
    fig = make_subplots(rows=1, cols=2,
                    specs=[[{}, {'type':'domain'}]]
                   )

    fig.add_trace(go.Pie(labels=df["Rating"], values=df[valueColumn], 
                         legendgroup='group1', showlegend=False, sort=False,
                         marker=dict(colors=color_list)),
                  row=1, col=2)

    for rating in ratings:
        ratingDf = df.loc[df["Rating"] == rating]
        for rating, group in ratingDf.groupby("Rating"):        
            fig.add_trace(go.Bar(x=group["Rating"], y=group[valueColumn], 
                                 name=rating, marker_color=colors[rating]), 
                          row=1, col=1)
            
    fig.update_layout(title_text=title)
    
    return fig

In [None]:
def construct_highlight(color, columns, column):
    res = []
    for c in columns: 
        if (c == 'Rule' or c == 'Threshold' or c == column):
            res.append('background-color: ' + color)
        else:
            res.append('')
    return res
    
def highlight(s, df, thresholdDf, column):
    thresholdLoc = thresholdDf.loc[thresholdDf['Rating'] == s.loc["Rating"]]
    threshold = thresholdLoc["Threshold"].values[0]
    value = s[column].values[0] 
    rating = thresholdLoc['Rating'].values[0]
    
    if rating == 'Small' or rating == 'Low':
        if value >= threshold:
            return construct_highlight('#65B32E', df, column[0])
        else: 
            return construct_highlight('#D37F27', df, column[0])
    else:    
        if value <= threshold:
            return construct_highlight('#65B32E', df, column[0])
        else:
            return construct_highlight('#D37F27', df, column[0])

def highlight_no_rating(s, df, threshold, column):    
    value = s[column].values[0] 
    
    if value <= threshold:
        return construct_highlight('#65B32E', df, column[0])
    else:
        return construct_highlight('#D37F27', df, column[0])        
        
def show_table_statistics(df):    
    if 'MethodCount' in df: 
        df['MethodPercentage'] = 100 * df.MethodCount / df.MethodCount.sum()
        
    if 'LineCount' in df:
        df['LinePercentage'] = 100 * df.LineCount / df.LineCount.sum()
        
    if 'TypeCount' in df:
        df['TypePercentage'] = 100 * df.TypeCount / df.TypeCount.sum()
    
    return df

def show_table_statistics_no_rating(df):    
    if 'LineCount' in df:
        df['LinePercentage'] = 100 * df.CountInterfaceLines / df.LineCount
        
    if 'TypeCount' in df:
        df['TypePercentage'] = 100 * df.CountInterfaceTypes / df.TypeCount
    
    return df

In [None]:
import pandas as pd
pd.set_option('display.max_colwidth', None)

def group_by_components(df, column, sortorder):
    df = df.groupby(['Component', 'Rating']).agg({column:'sum', 'LineCount':'sum'}).reset_index()

    df['Rating'] = pd.Categorical(df['Rating'], sortorder)

    df = df.sort_values(['Component', 'Rating'])
    
    df = df.reset_index(drop=True)
    
    return df

def group_by_ratings(df, column, sortorder):    
    df = df.groupby('Rating').agg({column:'sum', 'LineCount':'sum'}).reset_index()

    df['Rating'] = pd.Categorical(df['Rating'], sortorder)

    df = df.sort_values('Rating')
    
    df = df.reset_index(drop=True)
    
    return df

In [None]:
import plotly.express as px

def show(df, ratingColors, columnName):
    # show top-10
    display(df.head(10))
    
    # group by Component
    by_components = group_by_components(df, columnName, ratingColors) 
    
    # group by Rating
    by_ratings = group_by_ratings(by_components, columnName, ratingColors)
    
    # show plots
    create_side_by_side_plot(by_ratings, columnName, columnName, ratingColors).show()
    create_side_by_side_plot(by_ratings, "LineCount", "LineCount", ratingColors).show()
    
    # show component plots    
    fig = px.bar(by_components, x="Component", y=columnName, height=400, color="Rating", color_discrete_map=colors)
    fig.show()
    fig = px.bar(by_components, x="Component", y="LineCount", height=400, color="Rating", color_discrete_map=colors)
    fig.show()
    
    return by_ratings