In [66]:
import pandas as pd
import numpy as np

In [67]:
df = pd.read_csv('/Volumes/Aorta/aorta/results/inducedRainbow/allClusterSizesRed.csv')

In [69]:
def create_clonalArray(row):
    result_array = np.concatenate([np.full(value, int(column)) for column, value in row.items()])
    return result_array

In [70]:
result_dict = {}
for index, row in df.iterrows():
    result_dict[str(row['age']) + '_' + str(row['aorta'])] = create_clonalArray(row[-15:])

In [71]:
def ShannonLogN(x):
    N = sum(x)
    shannon = 0
    for i in range(len(x)):
        shannon += (x[i]/N) * np.log((x[i]/N))
    return shannon, shannon/np.log(len(x))

def ShannonLog2(x):
    N = sum(x)
    shannon = 0
    for i in range(len(x)):
        shannon += (x[i]/N) * np.log2((x[i]/N))
    return shannon, shannon/np.log2(len(x))

def TheilLogN(x):
    average = np.mean(x)
    theil = 0
    n = len(x)
    for i in range(n):
        theil += (x[i]/average) * np.log((x[i]/average))
    return (1/n) * theil

def TheilLog2(x):
    average = np.mean(x)
    theil = 0
    n = len(x)
    for i in range(n):
        theil += (x[i]/average) * np.log2((x[i]/average))
    return (1/n) * theil

def Simpson(x):
    N = sum(x)
    simpson = 0
    for i in range(len(x)):
        simpson += (x[i]/N)**2
    return simpson

def Hoover(x):
    average = np.mean(x)
    dividend = 0
    divisor = np.sum(x)
    n = len(x)
    for i in range(n):
        dividend += np.abs(x[i] - average)
    return dividend/(2*divisor)


def Gini_coefficient(x):
    diffsum = 0
    for i, xi in enumerate(x[:-1], 1):
        diffsum += np.sum(np.abs(xi - x[i:]))
    return diffsum / (len(x)**2 * np.mean(x))

In [75]:
dfStats = pd.DataFrame()

for key in result_dict:
    age = key.split('_')[0]
    aorta = key.split('_')[1]
    gini = Gini_coefficient(result_dict[key])
    shannonLogN, shannonLogNEquil = ShannonLogN(result_dict[key])
    shannonLog2, shannonLog2Equil = ShannonLog2(result_dict[key])
    theilLogN = TheilLogN(result_dict[key])
    theilLog2 = TheilLog2(result_dict[key])
    simpson = Simpson(result_dict[key])
    hoover = Hoover(result_dict[key])
    average = np.mean(result_dict[key])
    cv = np.std(result_dict[key])/np.mean(result_dict[key])

    dfStats = pd.concat([dfStats, pd.DataFrame({'age': age, 'aorta': aorta, 'average': average, 'cv': cv,
                                                 'gini': gini, 'shannonLogN': shannonLogN,'shannonLog2': shannonLog2,
                                                 'shannonLogNEquil': shannonLogNEquil,'shannonLog2Equil': shannonLog2Equil,
                                                 'theilLogN': theilLogN, 'theilLogN': theilLogN,
                                                 'simpson': simpson, 'hoover': hoover}, index = [0])])

In [77]:
dfStats.to_csv('/Users/jones/Documents/MasterThesis/files/inducibleRainbow_DiversityStats.csv', index=False)