In [1]:
import os
import pandas as pd
import numpy as np

In [2]:
import seaborn as sns

In [3]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.gridspec as grid_spec
%matplotlib inline

In [13]:
author = ["Chang_et_al", "Tse_et_al"][0]
task = ["Naming", "LD"][0]
data_folder = os.path.join("Output_Linguistic", author, task)

mdl = ["CSR", "GLM"][1]
zscored = ["", " (z-scored)"][1]
normed = ["", " (normed)"][0]
data_name = f"{mdl}_regression_results{zscored}{normed}.csv"

data_path = os.path.join(data_folder, data_name)
data = pd.read_csv(data_path)

print(data.columns)

Index(['SID', 'X0', 'LogCF', 'NS', 'CON', 'PC', 'SC', 'SAR', 'IMG', 'AoA',
       'R_squared', 'Adjusted_R2', 'LogLik', 'AIC', 'AICc', 'BIC', 'NRMSE'],
      dtype='object')


In [5]:
desc_cols = [
'X0', 'LogCF', 'NS', 'CON', 'PC', 'SC', 'SAR', 'IMG', 'AoA',
'R_squared', 'Adjusted_R2', 'LogLik', 'AIC', 'AICc', 'BIC', 'NRMSE'
]

data_desc = (data[desc_cols].describe()
             .astype('float')
             .applymap(lambda x: f"{x:.3f}")
             .loc[['min', 'max', 'mean', 'std'], :]
             .T)
    
print(data_desc)

                  min      max      mean     std
X0             -0.710    1.097     0.241   0.306
LogCF          -0.292    0.140    -0.071   0.085
NS             -0.262    0.196     0.024   0.069
CON            -0.316    0.135    -0.049   0.074
PC             -0.170    0.210    -0.002   0.079
SC             -0.194    0.215     0.006   0.067
SAR            -0.239    0.124    -0.056   0.078
IMG            -0.204    0.143    -0.054   0.067
AoA            -0.124    0.208     0.044   0.068
R_squared       0.193    0.507     0.349   0.063
Adjusted_R2    -0.072    0.310     0.092   0.080
LogLik       -164.221  -39.411  -101.232  20.920
AIC           168.822  418.442   292.464  41.839
AICc          203.322  447.393   328.970  39.917
BIC           308.862  564.321   430.958  43.784
NRMSE           0.289  269.580    23.296  33.313


In [6]:
dn = data_name.replace(".csv", "")
fp = os.path.join(data_folder, f"[Desc] {dn}.xlsx")

if not os.path.exists(fp):
    data_desc.to_excel(fp)

In [7]:
fig_folder = os.path.join("Figs_Linguistic", author, task)
if not os.path.exists(fig_folder):
    os.makedirs(fig_folder) 

In [8]:
for targ_col in [
     'R_squared', 'Adjusted_R2', 'LogLik', 'AIC', 'AICc', 'BIC', 'NRMSE'
]:
    fn = f"[histplot] {mdl} fitting {targ_col}.png"
    sns.histplot(
        data=data, 
        x=targ_col, 
        kde=True, 
        bins=30
    )
    plt.axvline(
        data[targ_col].mean(), color="red", linestyle="-", 
    )
    plt.axvline(
        data[targ_col].median(), color="lightgreen", linestyle="-", 
    )
    plt.tight_layout() 
    plt.savefig(os.path.join(fig_folder, fn), format='png', dpi=200)
    plt.close()

In [14]:
dn = data_name.replace(".csv", "")
fn = f"[coef] {dn}.png"

x_labels = ['X0', 'LogCF', 'NS', 'CON', 'PC', 'SC', 'SAR', 'IMG', 'AoA']

long_data = (data
             .loc[:, ['SID'] + x_labels]
             .melt(id_vars="SID"))

sorted_x_labs = sorted(x_labels, 
                       key=lambda x: 
                       data.loc[:, x_labels].mean().rank(ascending=False)[x])

colors = sns.cubehelix_palette(9, rot=-.25, light=.7)
sns.set_theme(style="white")

## Create ridgeplots in Matplotlib
## see: https://matplotlib.org/matplotblog/posts/create-ridgeplots-in-matplotlib/

gs = grid_spec.GridSpec(len(x_labels), 1)
fig = plt.figure(figsize=(10, 6), dpi=200)

ax_objs = []
for i, xlab in enumerate(sorted_x_labs):
    ax_objs.append(fig.add_subplot(gs[i:i+1, :]))
    
    plot = (long_data
            .query("variable == @xlab")["value"]
            .plot.kde(ax=ax_objs[-1], lw=0.5))
    
    # grabbing x and y data from the kde plot
    x = plot.get_children()[0]._x
    y = plot.get_children()[0]._y
    
    # filling the space beneath the distribution
    ax_objs[-1].fill_between(x, y, color=colors[i])

    # setting uniform x and y lims
    ax_objs[-1].set_xlim(min(long_data["value"]), max(long_data["value"]))
    ax_objs[-1].set_ylim(0, 8.5) # print(max(y))

    # make background transparent
    rect = ax_objs[-1].patch
    rect.set_alpha(0)

    # remove axis ticks and labels 
    ax_objs[-1].set_ylabel('')
    ax_objs[-1].set_yticklabels([])
    if xlab != sorted_x_labs[-1]:
        ax_objs[-1].set_xticklabels([])
    
    # remove borders
    for s in ["top", "right", "left", "bottom"]:
        ax_objs[-1].spines[s].set_visible(False)
        
    # add labels
    ax_objs[-1].text(min(long_data["value"]), 0.5, xlab, 
                     fontweight="bold", fontsize=14, ha="right")
    
    # mark median, mean, and zero
    ax_objs[-1].axvline(
        data[xlab].median(), color="lightgreen", linestyle="-"
    )
    ax_objs[-1].axvline(
        data[xlab].mean(), color="red", linestyle="--"
    )
    ax_objs[-1].axvline(
        0, color="black", linestyle="-", lw=.5
    )

# overlapping axes objects
gs.update(hspace= -0.5)

# plt.show()
plt.savefig(os.path.join(fig_folder, fn), format='png', bbox_inches='tight')
plt.close()