In [212]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt

In [213]:
dataset = pd.read_csv("./summaryTable.csv")
dataset

Unnamed: 0.1,Unnamed: 0,FileName,VoiceVote,VoiceLevel,FaceVote,FaceLevel,MultiModalVote,MultiModalLevel
0,1,1001_IEO_NEU_XX,N,69.1,N,92.22,N,64.78
1,2,1001_IEO_HAP_LO,N,71.67,H,57,H,57.38
2,3,1001_IEO_HAP_MD,N,67.71,H,62.62,H,56.56
3,4,1001_IEO_HAP_HI,H,63.5,H,68.25,H,73.2
4,5,1001_IEO_SAD_LO,N,73.71,N,73.5,N,74.8
...,...,...,...,...,...,...,...,...
7437,7438,1091_WSI_HAP_XX,N,68.12,H,65.12,H,64.8
7438,7439,1091_WSI_SAD_XX,N,55.67,N,71.57,N,48.8
7439,7440,1091_WSI_ANG_XX,A,34.4,A,50.43,A,67.11
7440,7441,1091_WSI_FEA_XX,S,39.67,F,68.29,F,51.71


In [214]:
dataset.describe()

Unnamed: 0.1,Unnamed: 0
count,7442.0
mean,3721.5
std,2148.464684
min,1.0
25%,1861.25
50%,3721.5
75%,5581.75
max,7442.0


## Measures of Frequency 
### A. Voice column

In [215]:
vdf = dataset[["VoiceVote", "VoiceLevel"]]
vdf = vdf[vdf["VoiceVote"].isin(["A", "H", "D", "S", "F", "N"])]
vdf["VoiceLevel"] = pd.to_numeric(vdf["VoiceLevel"], downcast="float") 
vdf = vdf.set_index("VoiceVote")
vdf

Unnamed: 0_level_0,VoiceLevel
VoiceVote,Unnamed: 1_level_1
N,69.099998
N,71.669998
N,67.709999
H,63.500000
N,73.709999
...,...
N,68.120003
N,55.669998
A,34.400002
S,39.669998


In [216]:
vdf.describe()

Unnamed: 0,VoiceLevel
count,6798.0
mean,61.940979
std,12.581073
min,8.667
25%,53.75
50%,62.599998
75%,71.0
max,98.669998


In [217]:
std = vdf.std()
std.tolist()

[12.581072807312012]

In [218]:
trans = vdf.describe().T
rang = trans["max"] / trans["min"]
rang.tolist()

[11.384562161476971]

In [219]:
var = vdf.var()
var.tolist()

[158.28338623046875]

In [220]:
mean = vdf.mean()
mean.tolist()

[61.94097900390625]

In [221]:
median = vdf.median()
median.tolist()

[62.599998474121094]

In [222]:
mode = vdf.mode()["VoiceLevel"]
mode.tolist()

[65.0]

In [223]:
vdfsum = pd.DataFrame({"std":std, "range":rang, "var":var, "mean":mean, "median":median, "mode":mode[0]})
vtable = vdfsum.T
vtable

Unnamed: 0,VoiceLevel
std,12.581073
range,11.384562
var,158.283386
mean,61.940979
median,62.599998
mode,65.0


### B. Face column

In [224]:
fdf = dataset[["FaceVote", "FaceLevel"]]
fdf = fdf[fdf["FaceVote"].isin(["A", "H", "D", "S", "F", "N"])]
fdf["FaceLevel"] = pd.to_numeric(fdf["FaceLevel"], downcast="float") 
fdf = fdf.set_index("FaceVote")
fdf

Unnamed: 0_level_0,FaceLevel
FaceVote,Unnamed: 1_level_1
N,92.220001
H,57.000000
H,62.619999
H,68.250000
N,73.500000
...,...
H,65.120003
N,71.570000
A,50.430000
F,68.290001


In [225]:
fdf.describe()

Unnamed: 0,FaceLevel
count,6972.0
mean,66.578522
std,11.596425
min,16.0
25%,59.139999
50%,67.669998
75%,75.102499
max,98.75


In [226]:
std = fdf.std()
std.tolist()

[11.59642505645752]

In [227]:
trans = fdf.describe().T
rang = trans["max"] / trans["min"]
rang.tolist()

[6.171875]

In [228]:
var = fdf.var()
var.tolist()

[134.47708129882812]

In [229]:
mean = fdf.mean()
mean.tolist()

[66.57852172851562]

In [230]:
median = fdf.median()
median.tolist()

[67.66999816894531]

In [231]:
mode = fdf.mode()["FaceLevel"]
mode.tolist()

[69.0]

In [232]:
fdfsum = pd.DataFrame({"std":std, "range":rang, "var":var, "mean":mean, "median":median, "mode":mode[0]})
ftable = fdfsum.T
ftable

Unnamed: 0,FaceLevel
std,11.596425
range,6.171875
var,134.477081
mean,66.578522
median,67.669998
mode,69.0


### C. MultiModal column

In [233]:
mdf = dataset[["MultiModalVote", "MultiModalLevel"]]
mdf = mdf[mdf["MultiModalVote"].isin(["A", "H", "D", "S", "F", "N"])]
mdf["MultiModalLevel"] = pd.to_numeric(mdf["MultiModalLevel"], downcast="float") 
mdf = mdf.set_index("MultiModalVote")
mdf

Unnamed: 0_level_0,MultiModalLevel
MultiModalVote,Unnamed: 1_level_1
N,64.779999
H,57.380001
H,56.560001
H,73.199997
N,74.800003
...,...
H,64.800003
N,48.799999
A,67.110001
F,51.709999


In [234]:
std = mdf.std()
std.tolist()

[12.53847599029541]

In [235]:
trans = mdf.describe().T
rang = trans["max"] / trans["min"]
rang.tolist()

[8.657777913411458]

In [236]:
var = mdf.var()
var.tolist()

[157.21337890625]

In [237]:
mean = mdf.mean()
mean.tolist()

[65.67635345458984]

In [238]:
median = mdf.median()
median.tolist()

[67.0]

In [239]:
mode = mdf.mode()["MultiModalLevel"]
mode.tolist()

[67.0]

In [240]:
mdfsum = pd.DataFrame({"std":std, "range":rang, "var":var, "mean":mean, "median":median, "mode":mode[0]})
mtable = mdfsum.T
mtable

Unnamed: 0,MultiModalLevel
std,12.538476
range,8.657778
var,157.213379
mean,65.676353
median,67.0
mode,67.0
