In [None]:
import json
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np
from math import log

files = ['results_4.json', 'results_256.json']

def plot(file):
    with open(f'data/{file}', 'r') as f:
        data = json.load(f)
    df = pd.json_normalize(data)
    
    # Rename column 'tp.minimizer_type' to 'Minimizer type'
    df = df.rename(columns={'tp.minimizer_type': 'Minimizer type'})
    # In type column replace minizer value with random minimizer.
    df['Minimizer type'] = df['Minimizer type'].replace('Minimizer', 'Random minimizer')
    df['Minimizer type'] = df['Minimizer type'].replace('LrMinimizer', 'LR-minimizer')
    df['Minimizer type'] = df['Minimizer type'].replace('ModMinimizer', 'Mod-minimizer')
    df['Minimizer type'] = df['Minimizer type'].replace('MiniceptionNew', 'Modified miniception')


    df = df[df.k >= np.log(df.w)/np.log(df.sigma)]
    df['param'] = df['tp.k0'].fillna(0) #+ df['tp.r'].fillna(0)
    s = df['sigma'].unique()[0]

    # Draw 1.5/(w+1) lines
    for w in df.w.unique():
        plt.axhline(y=(1.5+1/(2*w))/(w+1), color='black', linewidth=0.5)
        plt.axhline(y=(1.5)/(w+0.5), color='blue', linewidth=0.5)
        # Add the graph of (k/(k+1))/w:
        ks = range(df.k.min(), df.k.max())
        plt.plot(ks, [1/w + 1/(w+k) - 1/(w*(w+k)) for k in ks], color='red', linewidth=0.5)
    sns.lineplot(x='k', y='density', hue='Minimizer type', size='w', sizes=(1,2), data=df, legend='full', marker='.', dashes=False);
    plt.title(f'Minimizer density on random text (alphabet size σ={s}; length=5M)')
    plt.xlabel('Kmer length k')
    plt.ylabel('Density')
    plt.ylim(2**-4.7, 2**-1.95)
    # Tick labels 1/4, 1/8, 1/16
    plt.yscale('log', base=2)
    ws = df.w.unique()
    plt.yticks([2/(w+1) for w in ws]+[1.5/(w+0.5) for w in ws],[f'{2/(w+1):.3f}' for w in ws]+[f'{1.5/(w+0.5):.3f}' for w in ws])
    plt.xscale('log', base=2)
    plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left')
    # Add a second legend containing formulas for the red and black lines
    plt.text(1.05, 0.3, f'1.5 / (w+0.5)', color='blue', transform=plt.gca().transAxes)
    plt.text(1.05, 0.25, f'(1.5+1/(2w)) / (w+1)', color='black', transform=plt.gca().transAxes)
    plt.text(1.05, 0.2, f'1/w + 1/(w+k) - 1/(w·(w+k))', color='red', transform=plt.gca().transAxes)

    plt.savefig(f'fig/{file}.svg', bbox_inches='tight', dpi = 200)
    plt.savefig(f'fig/{file}.png', bbox_inches='tight', dpi = 200)
    plt.show()

    sns.lineplot(x='k', y='param', hue='tp.minimizer_type', size='w', data=df, legend='full');
    plt.title(f'Optimal parameter k0 or r')
    plt.xscale('log', base=2)
    plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left')
    plt.savefig(f'fig/{file}_params.svg', bbox_inches='tight')
    plt.show()

plot(files[0]);
plot(files[1]);
