In [1]:
import numpy as np
import pandas as pd

from lets_plot import *
from lets_plot.mapping import as_discrete
LetsPlot.setup_html()

import os; import sys; sys.path.append(os.path.join(sys.path[0], ".."))
from util import util

In [2]:
def add_basic_width(df, basic_font):
    def calc_basic_width(r):
        basic_width = int(r[
            (r.font_family == basic_font.family)&
            (r.font_size == basic_font.size)&
            (r.font_face == basic_font.face)
        ].iloc[0].width)
        r["basic_width"] = pd.Series(basic_width, index=r.index)
        return r

    df["basic_width"] = np.nan
    return df.groupby("char").apply(calc_basic_width)

def remove_outliers(df, columns, n_std=3):
    for column in columns:
        mean = df[column].mean()
        std = df[column].std()
        df = df[(df[column] <= mean + (n_std * std))]
    return df

In [3]:
BASIC_FONT_SIZE = 14
BASIC_FONT_FACE = "normal"
BASIC_FONT = util.Font("Lucida Grande", BASIC_FONT_SIZE, BASIC_FONT_FACE)

In [4]:
char_widths_df = util.get_df("../data/full/char_widths.csv", "all")
char_widths_df = add_basic_width(char_widths_df, basic_font=BASIC_FONT)
char_widths_df["width_diff"] = char_widths_df.width - char_widths_df.basic_width
char_widths_df["width_coeff"] = char_widths_df.width / char_widths_df.basic_width
char_widths_df["height_coeff"] = char_widths_df.font_size / BASIC_FONT_SIZE
char_widths_df["stretch_coeff"] = char_widths_df.width_coeff / char_widths_df.height_coeff
char_widths_df

Unnamed: 0,char_id,char,alphabet,font_family,font_size,font_face,width,basic_width,width_diff,width_coeff,height_coeff,stretch_coeff
0,65,A,basic_latin,Courier,9,normal,8,13,-5,0.615385,0.642857,0.957265
1,66,B,basic_latin,Courier,9,normal,8,13,-5,0.615385,0.642857,0.957265
2,67,C,basic_latin,Courier,9,normal,8,14,-6,0.571429,0.642857,0.888889
3,68,D,basic_latin,Courier,9,normal,8,14,-6,0.571429,0.642857,0.888889
4,69,E,basic_latin,Courier,9,normal,8,13,-5,0.615385,0.642857,0.957265
...,...,...,...,...,...,...,...,...,...,...,...,...
43627,123,{,russian,Lucida Console,20,bold+italic,22,6,16,3.666667,1.428571,2.566667
43628,124,|,russian,Lucida Console,20,bold+italic,18,6,12,3.000000,1.428571,2.100000
43629,125,},russian,Lucida Console,20,bold+italic,18,6,12,3.000000,1.428571,2.100000
43630,126,~,russian,Lucida Console,20,bold+italic,20,11,9,1.818182,1.428571,1.272727


## `'width_diff'`

In [5]:
ggplot(util.filter_by_font(char_widths_df, BASIC_FONT, filters=["family", "size"])) + \
    geom_boxplot(aes("basic_width", "width_diff", fill="font_face")) + \
    facet_grid(x="font_face") + \
    ggtitle("Вариация 'width_diff' для разной 'basic_width'")

In [6]:
ggplot(remove_outliers(util.filter_by_font(char_widths_df, BASIC_FONT, filters=["size"]), ["width_diff"])\
        .groupby(["font_family", "font_face"]).width_diff.mean().to_frame().reset_index()) + \
    geom_bar(aes(as_discrete("font_family", order_by="width_diff"), "width_diff", fill="font_face"), \
             stat='identity', color="black") + \
    facet_grid(x="font_face") + \
    ggsize(1000, 400) + \
    ggtitle("Среднее значение 'width_diff' для разных шрифтов")

## `'stretch_coeff'`

In [7]:
ggplot(util.filter_by_font(char_widths_df, BASIC_FONT, filters=["family", "face"]), aes("basic_width", "stretch_coeff")) + \
    geom_boxplot() + \
    ggtitle("Изменение 'stretch_coeff' в зависимости от базовой ширины символов") + \
    ggsize(800, 400) + \
    theme_minimal()

In [8]:
ggplot(remove_outliers(util.filter_by_font(char_widths_df, BASIC_FONT, filters=["face"]), ["stretch_coeff"])\
        .groupby(["font_family"]).stretch_coeff.mean().to_frame().reset_index()) + \
    geom_bar(aes(as_discrete("font_family", order_by="stretch_coeff"), "stretch_coeff"), stat='identity') + \
    ggtitle("Среднее значение 'stretch_coeff' для разных шрифтов")