# Statistic & Visualization for **letters pairs**

##About
---
Є задача декодування "зашифрованого" тексту, яка базується на кількості частоти входжень пар літер в "natural language text".

На цей момент ми отримали всі можливі табліці по яких можна в подальшому робити обчислення. Для їх кращого розуміння треба зробити деяку візуалізацію отриманих даних.

Цей ноутбук призначений беспосередньо для відображення деякої статистики по таблицях пар, а також їх візуалізація.

Таблиці (датафрейми) з якими можемо працювати:
- 1
- 2

---
---
## Import & mount

In [1]:
import os
import re
import copy
import math
import random
from time import time
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns

# plt.rcParams['figure.figsize'] = [15, 6]

In [2]:
# Mount GitHub
!git clone https://github.com/EdwardGerman/Columnar-Transposition-Cipher.git  # clone repository
%ls  # checking whether all files are present
drch = '/content/Columnar-Transposition-Cipher' # Path to data

folder_pt = 'Parity_tables'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
title_font = {'family': 'serif',
              'color':  'darkred',
              'weight': 'bold',
              'size': 14,
              }

---
---
## Funcfions

In [4]:
def display_df(df, name = True):
    pd.set_option('display.max_columns', None) # Print all rows (features) in DF
    if name == True and hasattr(df, 'name'): print(df.name + ':')
    display(df)
    print('\n')
    pd.reset_option('display.max_columns')     # Default setting: print print only first & final 5 rows

---
---
## Load DataFrame files

File list for DataFrame (table) files:

In [5]:
# Get list of all files in directory
file_list = os.listdir(os.path.join(drch, folder_pt))
file_list

['data.csv',
 'Torah_Bereshit.csv',
 'Genetics_124.csv',
 'Chat_Comments.csv',
 'Asimov_I_Robot.csv',
 'Harry_Potter_I.csv',
 'News_articles.csv',
 'Hamlet_BB.csv',
 'Sci_articles.csv',
 'Log BQuant for All.csv',
 'Log BQuant without spaces.csv',
 'Log BFreq for All.csv',
 'Log BFreq without spaces.csv',
 'All texts.csv',
 'Freq for All.csv',
 'Quantit for All.csv',
 'Quantit without spaces.csv',
 'Freq without spaces.csv',
 'BQuant for All.csv',
 'BFreq for All.csv',
 'BQuant without spaces.csv',
 'BFreq without spaces.csv']

In [15]:
freq_tables_list = ['Quantit for All.csv',
                    'Quantit without spaces.csv',
                    'Freq for All.csv',
                    'Freq without spaces.csv']

bias_tables_list = ['BQuant for All.csv',
                    'BQuant without spaces.csv',
                    'BFreq without spaces.csv',
                    'BFreq for All.csv']

log_tables_list =  ['Log BQuant for All.csv',
                    'Log BQuant without spaces.csv',
                    'Log BFreq for All.csv',
                    'Log BFreq without spaces.csv']

Read DFs from disk and write them to dict

In [17]:
freq_tables = {}
bias_tables = {}
log_tables = {}

# For frequency tables
for df_file in freq_tables_list:
    table_name = df_file.replace('.csv', '')   # Only file name, without extension

    # Read:
    path_pt = os.path.join(drch, folder_pt, df_file)
    freq_tables[table_name] = pd.read_csv(path_pt)
    freq_tables[table_name].name = table_name

# For biased tables
for df_file in bias_tables_list:
    table_name = df_file.replace('.csv', '')   # Only file name, without extension

    # Read:
    path_pt = os.path.join(drch, folder_pt, df_file)
    bias_tables[table_name] = pd.read_csv(path_pt)
    bias_tables[table_name].name = table_name

# For logarithmic tables
for df_file in log_tables_list:
    table_name = df_file.replace('.csv', '')   # Only file name, without extension

    # Read:
    path_pt = os.path.join(drch, folder_pt, df_file)
    log_tables[table_name] = pd.read_csv(path_pt)
    log_tables[table_name].name = table_name

---
---
## Statistics

Quantitativity and frequency tables with value sums:

In [8]:
for name in freq_tables:
    print(name,':\n','  Sum of values - ', freq_tables[name].values.sum(), sep='')
    display_df(freq_tables[name], name = False)

Quantit for All:
  Sum of values - 2263891


Unnamed: 0,-,-a,-b,-c,-d,-e,-f,-g,-h,-i,-j,-k,-l,-m,-n,-o,-p,-q,-r,-s,-t,-u,-v,-w,-x,-y,-z
-,0,43411,17690,19257,14681,9638,16856,10019,24691,28471,2121,3059,10513,16610,9263,24615,15370,1162,10588,30310,61470,4807,3340,24373,642,7990,364
a-,11562,276,3297,6124,4707,280,952,2996,718,6242,98,2080,13634,5119,27631,135,2735,23,15403,12045,20611,1971,3351,1013,213,3564,191
b-,1036,2469,358,40,13,7706,3,19,9,1760,139,1,3439,14,47,3821,30,0,2280,442,170,3042,70,0,2,1623,1
c-,2372,7287,21,715,41,7201,53,183,8266,2405,0,2674,1932,28,21,9134,14,20,2215,483,4608,1783,57,0,0,520,11
d-,42369,2368,98,120,759,9265,129,295,23,6460,23,11,667,151,1009,4604,23,85,1668,1848,61,2057,146,106,4,842,5
e-,79473,11005,366,5636,17124,6191,1867,1576,602,2720,47,310,8886,4734,18902,1046,2516,635,28192,17631,7256,338,3643,1269,2582,3594,74
f-,14427,2479,2,21,9,3658,2054,24,4,4152,0,1,1234,1,1,7167,24,0,2661,111,1078,1414,0,16,0,169,1
g-,13712,2424,34,64,36,6488,12,505,3845,1958,1,4,924,222,803,3591,97,5,2991,789,153,1044,1,84,1,355,3
h-,10502,18077,28,20,94,45248,26,4,24,12640,0,5,163,208,238,7340,16,3,2102,244,2589,1474,10,47,3,987,10
i-,4618,2483,1038,7357,6508,4401,3101,4233,63,252,37,1085,6260,5267,34002,8238,1019,145,4347,15540,15180,189,3790,8,356,18,881




Quantit without spaces:
  Sum of values - 1441269


Unnamed: 0,-a,-b,-c,-d,-e,-f,-g,-h,-i,-j,-k,-l,-m,-n,-o,-p,-q,-r,-s,-t,-u,-v,-w,-x,-y,-z
a-,276,3297,6124,4707,280,952,2996,718,6242,98,2080,13634,5119,27631,135,2735,23,15403,12045,20611,1971,3351,1013,213,3564,191
b-,2469,358,40,13,7706,3,19,9,1760,139,1,3439,14,47,3821,30,0,2280,442,170,3042,70,0,2,1623,1
c-,7287,21,715,41,7201,53,183,8266,2405,0,2674,1932,28,21,9134,14,20,2215,483,4608,1783,57,0,0,520,11
d-,2368,98,120,759,9265,129,295,23,6460,23,11,667,151,1009,4604,23,85,1668,1848,61,2057,146,106,4,842,5
e-,11005,366,5636,17124,6191,1867,1576,602,2720,47,310,8886,4734,18902,1046,2516,635,28192,17631,7256,338,3643,1269,2582,3594,74
f-,2479,2,21,9,3658,2054,24,4,4152,0,1,1234,1,1,7167,24,0,2661,111,1078,1414,0,16,0,169,1
g-,2424,34,64,36,6488,12,505,3845,1958,1,4,924,222,803,3591,97,5,2991,789,153,1044,1,84,1,355,3
h-,18077,28,20,94,45248,26,4,24,12640,0,5,163,208,238,7340,16,3,2102,244,2589,1474,10,47,3,987,10
i-,2483,1038,7357,6508,4401,3101,4233,63,252,37,1085,6260,5267,34002,8238,1019,145,4347,15540,15180,189,3790,8,356,18,881
j-,410,1,3,0,384,0,0,0,46,1,1,0,0,1,626,13,1,12,2,0,945,84,0,0,1,0




Freq for All:
  Sum of values - 0.999999999999982


Unnamed: 0,-,-a,-b,-c,-d,-e,-f,-g,-h,-i,-j,-k,-l,-m,-n,-o,-p,-q,-r,-s,-t,-u,-v,-w,-x,-y,-z
-,0.0,0.019175,0.00781398,0.008506,0.006485,0.004257,0.007445588,0.004425566,0.01090644,0.012576,0.0009368826,0.001351213,0.004643775,0.007336926,0.004091628,0.01087287,0.006789196,0.0005132756,0.004677,0.01338845,0.02715237,0.002123,0.001475336,0.010766,0.0002835826,0.003529322,0.0001607851
a-,0.005107,0.000122,0.001456342,0.002705,0.002079,0.000124,0.0004205149,0.001323385,0.0003171531,0.002757,4.32883e-05,0.0009187721,0.006022375,0.002261151,0.01220509,5.963185e-05,0.001208097,1.01595e-05,0.006804,0.005320486,0.009104237,0.000871,0.001480195,0.000447,9.40858e-05,0.001574281,8.436802e-05
b-,0.000458,0.001091,0.0001581348,1.8e-05,6e-06,0.003404,1.325152e-06,8.39263e-06,3.975456e-06,0.000777,6.139872e-05,4.417174e-07,0.001519066,6.184043e-06,2.076072e-05,0.001687802,1.325152e-05,0.0,0.001007,0.0001952391,7.509195e-05,0.001344,3.092022e-05,0.0,8.834348e-07,0.0007169073,4.417174e-07
c-,0.001048,0.003219,9.276065e-06,0.000316,1.8e-05,0.003181,2.341102e-05,8.083428e-05,0.003651236,0.001062,0.0,0.001181152,0.000853398,1.236809e-05,9.276065e-06,0.004034647,6.184043e-06,8.834348e-06,0.000978,0.0002133495,0.002035434,0.000788,2.517789e-05,0.0,0.0,0.000229693,4.858891e-06
d-,0.018715,0.001046,4.32883e-05,5.3e-05,0.000335,0.004093,5.698154e-05,0.0001303066,1.01595e-05,0.002853,1.01595e-05,4.858891e-06,0.0002946255,6.669932e-05,0.0004456928,0.002033667,1.01595e-05,3.754598e-05,0.000737,0.0008162937,2.694476e-05,0.000909,6.449074e-05,4.7e-05,1.76687e-06,0.000371926,2.208587e-06
e-,0.035105,0.004861,0.0001616686,0.00249,0.007564,0.002735,0.0008246863,0.0006961466,0.0002659139,0.001201,2.076072e-05,0.0001369324,0.003925101,0.00209109,0.008349342,0.0004620364,0.001111361,0.0002804905,0.012453,0.007787919,0.003205101,0.000149,0.001609176,0.000561,0.001140514,0.001587532,3.268709e-05
f-,0.006373,0.001095,8.834348e-07,9e-06,4e-06,0.001616,0.0009072875,1.060122e-05,1.76687e-06,0.001834,0.0,4.417174e-07,0.0005450792,4.417174e-07,4.417174e-07,0.003165788,1.060122e-05,0.0,0.001175,4.903063e-05,0.0004761713,0.000625,0.0,7e-06,0.0,7.465024e-05,4.417174e-07
g-,0.006057,0.001071,1.501839e-05,2.8e-05,1.6e-05,0.002866,5.300609e-06,0.0002230673,0.001698403,0.000865,4.417174e-07,1.76687e-06,0.0004081469,9.806126e-05,0.0003546991,0.001586207,4.284659e-05,2.208587e-06,0.001321,0.000348515,6.758276e-05,0.000461,4.417174e-07,3.7e-05,4.417174e-07,0.0001568097,1.325152e-06
h-,0.004639,0.007985,1.236809e-05,9e-06,4.2e-05,0.019987,1.148465e-05,1.76687e-06,1.060122e-05,0.005583,0.0,2.208587e-06,7.199993e-05,9.187721e-05,0.0001051287,0.003242206,7.067478e-06,1.325152e-06,0.000928,0.000107779,0.001143606,0.000651,4.417174e-06,2.1e-05,1.325152e-06,0.0004359751,4.417174e-06
i-,0.00204,0.001097,0.0004585026,0.00325,0.002875,0.001944,0.001369766,0.00186979,2.782819e-05,0.000111,1.634354e-05,0.0004792634,0.002765151,0.002326525,0.01501927,0.003638868,0.00045011,6.404902e-05,0.00192,0.006864288,0.00670527,8.3e-05,0.001674109,4e-06,0.0001572514,7.950913e-06,0.000389153




Freq without spaces:
  Sum of values - 0.9999999999999822


Unnamed: 0,-a,-b,-c,-d,-e,-f,-g,-h,-i,-j,-k,-l,-m,-n,-o,-p,-q,-r,-s,-t,-u,-v,-w,-x,-y,-z
a-,0.000191,0.002287567,0.004249,0.003266,0.000194,0.000660529,0.002078724,0.0004981721,0.004331,6.799563e-05,0.001443173,0.00946,0.003551731,0.0191713,9.366746e-05,0.001897633,1.595816e-05,0.010687,0.008357219,0.014301,0.001368,0.002325034,0.000703,0.0001477864,0.002472821,0.0001325221
b-,0.001713,0.0002483922,2.8e-05,9e-06,0.005347,2.081499e-06,1.318283e-05,6.244497e-06,0.001221,9.644279e-05,6.93833e-07,0.002386,9.713662e-06,3.261015e-05,0.002651136,2.081499e-05,0.0,0.001582,0.0003066742,0.000118,0.002111,4.856831e-05,0.0,1.387666e-06,0.001126091,6.93833e-07
c-,0.005056,1.457049e-05,0.000496,2.8e-05,0.004996,3.677315e-05,0.0001269714,0.005735224,0.001669,0.0,0.001855309,0.00134,1.942732e-05,1.457049e-05,0.006337471,9.713662e-06,1.387666e-05,0.001537,0.0003351213,0.003197,0.001237,3.954848e-05,0.0,0.0,0.0003607932,7.632163e-06
d-,0.001643,6.799563e-05,8.3e-05,0.000527,0.006428,8.950446e-05,0.0002046807,1.595816e-05,0.004482,1.595816e-05,7.632163e-06,0.000463,0.0001047688,0.0007000775,0.003194407,1.595816e-05,5.897581e-05,0.001157,0.001282203,4.2e-05,0.001427,0.0001012996,7.4e-05,2.775332e-06,0.0005842074,3.469165e-06
e-,0.007636,0.0002539429,0.00391,0.011881,0.004296,0.001295386,0.001093481,0.0004176875,0.001887,3.261015e-05,0.0002150882,0.006165,0.003284605,0.01311483,0.0007257493,0.001745684,0.000440584,0.019561,0.01223297,0.005034,0.000235,0.002527634,0.00088,0.001791477,0.002493636,5.134364e-05
f-,0.00172,1.387666e-06,1.5e-05,6e-06,0.002538,0.001425133,1.665199e-05,2.775332e-06,0.002881,0.0,6.93833e-07,0.000856,6.93833e-07,6.93833e-07,0.004972701,1.665199e-05,0.0,0.001846,7.701546e-05,0.000748,0.000981,0.0,1.1e-05,0.0,0.0001172578,6.93833e-07
g-,0.001682,2.359032e-05,4.4e-05,2.5e-05,0.004502,8.325996e-06,0.0003503857,0.002667788,0.001359,6.93833e-07,2.775332e-06,0.000641,0.0001540309,0.0005571479,0.002491554,6.73018e-05,3.469165e-06,0.002075,0.0005474342,0.000106,0.000724,6.93833e-07,5.8e-05,6.93833e-07,0.0002463107,2.081499e-06
h-,0.012542,1.942732e-05,1.4e-05,6.5e-05,0.031395,1.803966e-05,2.775332e-06,1.665199e-05,0.00877,0.0,3.469165e-06,0.000113,0.0001443173,0.0001651323,0.005092734,1.110133e-05,2.081499e-06,0.001458,0.0001692953,0.001796,0.001023,6.93833e-06,3.3e-05,2.081499e-06,0.0006848132,6.93833e-06
i-,0.001723,0.0007201987,0.005105,0.004515,0.003054,0.002151576,0.002936995,4.371148e-05,0.000175,2.567182e-05,0.0007528088,0.004343,0.003654418,0.02359171,0.005715796,0.0007070158,0.0001006058,0.003016,0.01078216,0.010532,0.000131,0.002629627,6e-06,0.0002470045,1.248899e-05,0.0006112669
j-,0.000284,6.93833e-07,2e-06,0.0,0.000266,0.0,0.0,0.0,3.2e-05,6.93833e-07,6.93833e-07,0.0,0.0,6.93833e-07,0.0004343395,9.019829e-06,6.93833e-07,8e-06,1.387666e-06,0.0,0.000656,5.828197e-05,0.0,0.0,6.93833e-07,0.0






---
`Biased` tables with value sums:

In [9]:
for name in bias_tables:
    print(name,':\n','  Sum of values - ', bias_tables[name].values.sum(), sep='')
    display_df(bias_tables[name], name = False)

BQuant for All:
  Sum of values - 2263944


Unnamed: 0,-,-a,-b,-c,-d,-e,-f,-g,-h,-i,-j,-k,-l,-m,-n,-o,-p,-q,-r,-s,-t,-u,-v,-w,-x,-y,-z
-,1,43411,17690,19257,14681,9638,16856,10019,24691,28471,2121,3059,10513,16610,9263,24615,15370,1162,10588,30310,61470,4807,3340,24373,642,7990,364
a-,11562,276,3297,6124,4707,280,952,2996,718,6242,98,2080,13634,5119,27631,135,2735,23,15403,12045,20611,1971,3351,1013,213,3564,191
b-,1036,2469,358,40,13,7706,3,19,9,1760,139,1,3439,14,47,3821,30,1,2280,442,170,3042,70,1,2,1623,1
c-,2372,7287,21,715,41,7201,53,183,8266,2405,1,2674,1932,28,21,9134,14,20,2215,483,4608,1783,57,1,1,520,11
d-,42369,2368,98,120,759,9265,129,295,23,6460,23,11,667,151,1009,4604,23,85,1668,1848,61,2057,146,106,4,842,5
e-,79473,11005,366,5636,17124,6191,1867,1576,602,2720,47,310,8886,4734,18902,1046,2516,635,28192,17631,7256,338,3643,1269,2582,3594,74
f-,14427,2479,2,21,9,3658,2054,24,4,4152,1,1,1234,1,1,7167,24,1,2661,111,1078,1414,1,16,1,169,1
g-,13712,2424,34,64,36,6488,12,505,3845,1958,1,4,924,222,803,3591,97,5,2991,789,153,1044,1,84,1,355,3
h-,10502,18077,28,20,94,45248,26,4,24,12640,1,5,163,208,238,7340,16,3,2102,244,2589,1474,10,47,3,987,10
i-,4618,2483,1038,7357,6508,4401,3101,4233,63,252,37,1085,6260,5267,34002,8238,1019,145,4347,15540,15180,189,3790,8,356,18,881




BQuant without spaces:
  Sum of values - 1441321


Unnamed: 0,-a,-b,-c,-d,-e,-f,-g,-h,-i,-j,-k,-l,-m,-n,-o,-p,-q,-r,-s,-t,-u,-v,-w,-x,-y,-z
a-,276,3297,6124,4707,280,952,2996,718,6242,98,2080,13634,5119,27631,135,2735,23,15403,12045,20611,1971,3351,1013,213,3564,191
b-,2469,358,40,13,7706,3,19,9,1760,139,1,3439,14,47,3821,30,1,2280,442,170,3042,70,1,2,1623,1
c-,7287,21,715,41,7201,53,183,8266,2405,1,2674,1932,28,21,9134,14,20,2215,483,4608,1783,57,1,1,520,11
d-,2368,98,120,759,9265,129,295,23,6460,23,11,667,151,1009,4604,23,85,1668,1848,61,2057,146,106,4,842,5
e-,11005,366,5636,17124,6191,1867,1576,602,2720,47,310,8886,4734,18902,1046,2516,635,28192,17631,7256,338,3643,1269,2582,3594,74
f-,2479,2,21,9,3658,2054,24,4,4152,1,1,1234,1,1,7167,24,1,2661,111,1078,1414,1,16,1,169,1
g-,2424,34,64,36,6488,12,505,3845,1958,1,4,924,222,803,3591,97,5,2991,789,153,1044,1,84,1,355,3
h-,18077,28,20,94,45248,26,4,24,12640,1,5,163,208,238,7340,16,3,2102,244,2589,1474,10,47,3,987,10
i-,2483,1038,7357,6508,4401,3101,4233,63,252,37,1085,6260,5267,34002,8238,1019,145,4347,15540,15180,189,3790,8,356,18,881
j-,410,1,3,1,384,1,1,1,46,1,1,1,1,1,626,13,1,12,2,1,945,84,1,1,1,1




BFreq without spaces:
  Sum of values - 0.9999999999999839


Unnamed: 0,-a,-b,-c,-d,-e,-f,-g,-h,-i,-j,-k,-l,-m,-n,-o,-p,-q,-r,-s,-t,-u,-v,-w,-x,-y,-z
a-,0.000191,0.002287485,0.00424888,0.003265754,0.000194,0.0006605052,0.002078649,0.0004981541,0.004331,6.799318e-05,0.001443121,0.009459378,0.003551603,0.01917061,9.366408e-05,0.001897565,1.595758e-05,0.01068672,0.008356917,0.01430008,0.001367,0.002324951,0.0007028275,0.0001477811,0.002472732,0.0001325173
b-,0.001713,0.0002483833,2.775232e-05,9.019504e-06,0.005346,2.081424e-06,1.318235e-05,6.244272e-06,0.001221,9.643931e-05,6.93808e-07,0.002386006,9.713312e-06,3.260897e-05,0.00265104,2.081424e-05,6.93808e-07,0.001581882,0.0003066631,0.0001179474,0.002111,4.856656e-05,6.93808e-07,1.387616e-06,0.00112605,6.93808e-07
c-,0.005056,1.456997e-05,0.0004960727,2.844613e-05,0.004996,3.677182e-05,0.0001269669,0.005735017,0.001669,6.93808e-07,0.001855243,0.001340437,1.942662e-05,1.456997e-05,0.006337242,9.713312e-06,1.387616e-05,0.001536785,0.0003351093,0.003197067,0.001237,3.954705e-05,6.93808e-07,6.93808e-07,0.0003607801,7.631888e-06
d-,0.001643,6.799318e-05,8.325696e-05,0.0005266003,0.006428,8.950123e-05,0.0002046734,1.595758e-05,0.004482,1.595758e-05,7.631888e-06,0.0004627699,0.000104765,0.0007000522,0.003194292,1.595758e-05,5.897368e-05,0.001157272,0.001282157,4.232229e-05,0.001427,0.000101296,7.354365e-05,2.775232e-06,0.0005841863,3.46904e-06
e-,0.007635,0.0002539337,0.003910302,0.01188077,0.004295,0.001295339,0.001093441,0.0004176724,0.001887,3.260897e-05,0.0002150805,0.006165178,0.003284487,0.01311436,0.0007257231,0.001745621,0.0004405681,0.01955983,0.01223253,0.005034271,0.000235,0.002527542,0.0008804423,0.001791412,0.002493546,5.134179e-05
f-,0.00172,1.387616e-06,1.456997e-05,6.244272e-06,0.002538,0.001425082,1.665139e-05,2.775232e-06,0.002881,6.93808e-07,6.93808e-07,0.000856159,6.93808e-07,6.93808e-07,0.004972522,1.665139e-05,6.93808e-07,0.001846223,7.701268e-05,0.000747925,0.000981,6.93808e-07,1.110093e-05,6.93808e-07,0.0001172535,6.93808e-07
g-,0.001682,2.358947e-05,4.440371e-05,2.497709e-05,0.004501,8.325696e-06,0.000350373,0.002667692,0.001358,6.93808e-07,2.775232e-06,0.0006410786,0.0001540254,0.0005571278,0.002491464,6.729937e-05,3.46904e-06,0.00207518,0.0005474145,0.0001061526,0.000724,6.93808e-07,5.827987e-05,6.93808e-07,0.0002463018,2.081424e-06
h-,0.012542,1.942662e-05,1.387616e-05,6.521795e-05,0.031393,1.803901e-05,2.775232e-06,1.665139e-05,0.00877,6.93808e-07,3.46904e-06,0.0001130907,0.0001443121,0.0001651263,0.005092551,1.110093e-05,2.081424e-06,0.001458384,0.0001692891,0.001796269,0.001023,6.93808e-06,3.260897e-05,2.081424e-06,0.0006847885,6.93808e-06
i-,0.001723,0.0007201727,0.005104345,0.004515302,0.003053,0.002151499,0.002936889,4.37099e-05,0.000175,2.567089e-05,0.0007527816,0.004343238,0.003654287,0.02359086,0.00571559,0.0007069903,0.0001006022,0.003015983,0.01078178,0.01053201,0.000131,0.002629532,5.550464e-06,0.0002469956,1.248854e-05,0.0006112448
j-,0.000284,6.93808e-07,2.081424e-06,6.93808e-07,0.000266,6.93808e-07,6.93808e-07,6.93808e-07,3.2e-05,6.93808e-07,6.93808e-07,6.93808e-07,6.93808e-07,6.93808e-07,0.0004343238,9.019504e-06,6.93808e-07,8.325696e-06,1.387616e-06,6.93808e-07,0.000656,5.827987e-05,6.93808e-07,6.93808e-07,6.93808e-07,6.93808e-07




BFreq for All:
  Sum of values - 0.9999999999999825


Unnamed: 0,-,-a,-b,-c,-d,-e,-f,-g,-h,-i,-j,-k,-l,-m,-n,-o,-p,-q,-r,-s,-t,-u,-v,-w,-x,-y,-z
-,4.41707e-07,0.019175,0.007813798,0.008505952,0.006484701,0.004257,0.007445414,0.004425463,0.01090619,0.012576,0.0009368606,0.001351182,0.004643666,0.007336754,0.004091532,0.01087262,0.006789037,0.0005132636,0.004676794,0.01338814,0.02715173,0.002123,0.001475302,0.01076573,0.0002835759,0.003529239,0.0001607814
a-,0.005107017,0.000122,0.001456308,0.002705014,0.002079115,0.000124,0.0004205051,0.001323354,0.0003171457,0.002757,4.328729e-05,0.0009187506,0.006022234,0.002261098,0.01220481,5.963045e-05,0.001208069,1.015926e-05,0.006803614,0.005320361,0.009104024,0.000871,0.00148016,0.0004474492,9.40836e-05,0.001574244,8.436604e-05
b-,0.0004576085,0.001091,0.0001581311,1.766828e-05,5.742192e-06,0.003404,1.325121e-06,8.392434e-06,3.975363e-06,0.000777,6.139728e-05,4.41707e-07,0.001519031,6.183899e-06,2.076023e-05,0.001687763,1.325121e-05,4.41707e-07,0.001007092,0.0001952345,7.50902e-05,0.001344,3.091949e-05,4.41707e-07,8.834141e-07,0.0007168905,4.41707e-07
c-,0.001047729,0.003219,9.275848e-06,0.0003158205,1.810999e-05,0.003181,2.341047e-05,8.083239e-05,0.00365115,0.001062,4.41707e-07,0.001181125,0.000853378,1.23678e-05,9.275848e-06,0.004034552,6.183899e-06,8.834141e-06,0.0009783811,0.0002133445,0.002035386,0.000788,2.51773e-05,4.41707e-07,4.41707e-07,0.0002296877,4.858777e-06
d-,0.01871469,0.001046,4.328729e-05,5.300484e-05,0.0003352556,0.004092,5.698021e-05,0.0001303036,1.015926e-05,0.002853,1.015926e-05,4.858777e-06,0.0002946186,6.669776e-05,0.0004456824,0.002033619,1.015926e-05,3.75451e-05,0.0007367673,0.0008162746,2.694413e-05,0.000909,6.448923e-05,4.682095e-05,1.766828e-06,0.0003719173,2.208535e-06
e-,0.03510378,0.004861,0.0001616648,0.002489461,0.007563791,0.002735,0.000824667,0.0006961303,0.0002659076,0.001201,2.076023e-05,0.0001369292,0.003925009,0.002091041,0.008349146,0.0004620256,0.001111335,0.000280484,0.0124526,0.007787737,0.003205026,0.000149,0.001609139,0.0005605262,0.001140488,0.001587495,3.268632e-05
f-,0.006372507,0.001095,8.834141e-07,9.275848e-06,3.975363e-06,0.001616,0.0009072663,1.060097e-05,1.766828e-06,0.001834,4.41707e-07,4.41707e-07,0.0005450665,4.41707e-07,4.41707e-07,0.003165714,1.060097e-05,4.41707e-07,0.001175382,4.902948e-05,0.0004761602,0.000625,4.41707e-07,7.067313e-06,4.41707e-07,7.464849e-05,4.41707e-07
g-,0.006056687,0.001071,1.501804e-05,2.826925e-05,1.590145e-05,0.002866,5.300484e-06,0.0002230621,0.001698364,0.000865,4.41707e-07,1.766828e-06,0.0004081373,9.805896e-05,0.0003546908,0.00158617,4.284558e-05,2.208535e-06,0.001321146,0.0003485069,6.758118e-05,0.000461,4.41707e-07,3.710339e-05,4.41707e-07,0.000156806,1.325121e-06
h-,0.004638807,0.007985,1.23678e-05,8.834141e-06,4.152046e-05,0.019986,1.148438e-05,1.766828e-06,1.060097e-05,0.005583,4.41707e-07,2.208535e-06,7.199825e-05,9.187506e-05,0.0001051263,0.00324213,7.067313e-06,1.325121e-06,0.0009284682,0.0001077765,0.00114358,0.000651,4.41707e-06,2.076023e-05,1.325121e-06,0.0004359648,4.41707e-06
i-,0.002039803,0.001097,0.0004584919,0.003249639,0.002874629,0.001944,0.001369734,0.001869746,2.782754e-05,0.000111,1.634316e-05,0.0004792521,0.002765086,0.002326471,0.01501892,0.003638783,0.0004500995,6.404752e-05,0.0019201,0.006864127,0.006705113,8.3e-05,0.00167407,3.533656e-06,0.0001572477,7.950727e-06,0.0003891439






---
---
## Visualization
Two types of visualization were used:
- 2D visualization - **`Heat map`** (by Seaborn)
- 3D visualization - **`3D Bars`** (by matplotlib)


### Functions for visualization
For usability was wtite especial function with additional parameters

##### 2D Heatmap

In [10]:
def ShowHeatmap(df, figsize=(20, 15), cmap='coolwarm', font_size=None):
    sns.set()

    old_figsize = plt.gcf().get_size_inches()   # save old figsize
    plt.figure(figsize=figsize)                 # set heatmap size


    if (df.astype(int) == df).all().all():
        fmt = 'd'
        if font_size == None: font_size = 12
    else:
        fmt = '.3g'
        if font_size == None: font_size = 8

    ax = sns.heatmap(df, annot=True, fmt=fmt, cmap=cmap,  cbar=False,
                     annot_kws={'size': font_size}) # YlGnBu viridis
                     # 'd' # '.2f' # '.0f' # '.2g'

    ax.xaxis.tick_top()     # axis ticks on top
    plt.yticks(rotation=0)  # rotate of axis ticks

    font_t = {'family': 'serif', 'color':  'darkred', 'weight': 'bold', 'size': 24,}
    font_l = {'family': 'serif', 'weight': 'bold', 'size': 16,}

    # Title, xlabel, ylabel:
    plt.text(0.5, 1.07, df.name, ha='center', transform=ax.transAxes, fontdict=font_t)
    plt.text(0.5, 1.04, 'Second letter in pair', ha='center', transform=ax.transAxes, fontdict=font_l)
    plt.ylabel('First letter in pair', fontdict=font_l)

    plt.show() # Show heatmap

    plt.figure(figsize=old_figsize)

##### 3D Histogram

In [11]:
def Show3DBars(df, figsize=(16, 12), colormap=cm.viridis, elev=20, azim=30):

    old_figsize = plt.gcf().get_size_inches()   # save old figsize
    plt.figure(figsize=figsize)                 # set heatmap size

    row_labels = df.columns.tolist() # ticks name list
    col_labels = df.index.tolist()   # ticks name list


    num_rows, num_cols = df.shape   # Get dataframe size
    x = np.arange(num_cols)
    y = np.arange(num_rows)
    x, y = np.meshgrid(x, y)        # Set meshgrid

    z = df.values   # Get bar heights

    fig = plt.figure(figsize=figsize)
    ax = fig.add_subplot(111, projection='3d')  # Making 3D axes

    colors = colormap(z.flatten() / z.max())    # Set different bar colors depend on bar height


    x = x.flatten()
    y = y.flatten()
    dx = dy = 0.8
    dz = z.flatten()

    ax.bar3d(x, y, np.zeros_like(dz), dx, dy, dz, shade=True, color=colors)# Build 3D hist

    # Set ticks of x & y axes
    ax.set_xticks(np.arange(num_cols))#  - dx/2
    ax.set_xticklabels(col_labels)
    ax.set_yticks(np.arange(num_rows))#  - dy/2
    ax.set_yticklabels(row_labels)

    ax.set_zlabel('Z Label')    # Set Z label

    ax.view_init(elev=elev, azim=azim)  # Tune of the graph rotate

    font_t = {'family': 'serif', 'color':  'darkred', 'weight': 'bold', 'size': 24,}
    ax.set_title(df.name, fontdict=font_t)

    plt.show()
    plt.figure(figsize=old_figsize)


### Show

#### Frequency tables

In [12]:
print(list(freq_tables.keys()))

['Quantit for All', 'Quantit without spaces', 'Freq for All', 'Freq without spaces']


In [13]:
for dfname in freq_tables:
    ShowHeatmap(freq_tables[dfname])
    Show3DBars(freq_tables[dfname])

Output hidden; open in https://colab.research.google.com to view.

---
#### Biased tables



In [18]:
for dfname in bias_tables:
    ShowHeatmap(bias_tables[dfname])

Output hidden; open in https://colab.research.google.com to view.

---
#### Logarithmic tables

In [19]:
for dfname in log_tables:
    ShowHeatmap(log_tables[dfname])
    Show3DBars(log_tables[dfname])

Output hidden; open in https://colab.research.google.com to view.

---
---
---
