In [None]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt

# ---- Figure specs and style ----
CM_TO_INCH = 1/2.54
panel_w_cm = 5.0
panel_h_cm = 5.0
fig_w = 4 * panel_w_cm * CM_TO_INCH
fig_h = 4 * panel_h_cm * CM_TO_INCH

mpl.rcParams.update({
    'font.size': 7,
    'axes.titlesize': 8,
    'axes.labelsize': 7,
    'xtick.labelsize': 6,
    'ytick.labelsize': 6,
})


LABEL_SIZE = 10     # choose the exact size you want (e.g., 9, 10, 12)
LABEL_WEIGHT = 'bold'

# ---- Load data ----
file_path = '/content/sample_data/small datasets.xlsx'
df = pd.read_excel(file_path, sheet_name=0, engine='openpyxl')
df.columns = [str(c).strip() for c in df.columns]
df['K'] = pd.to_numeric(df['K'], errors='coerce')
df['Bias'] = pd.to_numeric(df['Variance'], errors='coerce')
df['Model'] = df['Model'].astype(str)
df['Dataset'] = df['Dataset'].astype(str)

# Order and appearance
datasets = ['D9', 'D10', 'D11', 'D12']
models = ['DT', 'KNN', 'LR', 'SVM']
markers = {'DT':'D','KNN':'^','LR':'s','SVM':'o'}
color = 'black'

plt.style.use('seaborn-v0_8')
fig, axes = plt.subplots(len(models), len(datasets), figsize=(fig_w, fig_h), dpi=600, constrained_layout=True)

x_ticks = [0, 5, 10, 15, 20]

# Helper: choose scaling factor
def choose_factor(values):
    amax = float(np.nanmax(np.abs(values))) if len(values) else 1.0
    if amax < 0.01:
        return 1000
    elif amax < 0.1:
        return 100
    elif amax < 1:
        return 10
    else:
        return 1

# Helper: check crowding near a corner
# corner: 'TL', 'BL', 'TR'
def is_crowded(K, B_scaled, p, corner, x_tol=0.5, y_top=0.85, y_bottom=0.15):
    if len(K) == 0:
        return False
    ymin, ymax = np.nanmin(B_scaled), np.nanmax(B_scaled)
    rng = (ymax - ymin) if (ymax - ymin) != 0 else 1.0
    Kmin = np.nanmin(K)
    Kmax = np.nanmax(K)
    y_norm = (B_scaled - ymin) / rng
    near_left = K <= (Kmin + x_tol)
    near_right = K >= (Kmax - x_tol)
    # Include trend line proximity at extremes
    trend_top_left = False
    trend_bottom_left = False
    trend_top_right = False
    if p is not None:
        y_min_pred = float(p(Kmin))
        y_max_pred = float(p(Kmax))
        y_min_norm = (y_min_pred - ymin) / rng
        y_max_norm = (y_max_pred - ymin) / rng
        trend_top_left = y_min_norm > y_top
        trend_bottom_left = y_min_norm < y_bottom
        trend_top_right = y_max_norm > y_top
    if corner == 'TL':
        return bool(np.any(near_left & (y_norm > y_top)) or trend_top_left)
    if corner == 'BL':
        return bool(np.any(near_left & (y_norm < y_bottom)) or trend_bottom_left)
    if corner == 'TR':
        return bool(np.any(near_right & (y_norm > y_top)) or trend_top_right)
    return False

for i, m in enumerate(models):
    for j, d in enumerate(datasets):
        ax = axes[i, j]
        sub = df[(df['Model']==m) & (df['Dataset']==d)].dropna(subset=['K','Bias']).sort_values('K')
        K = sub['K'].values
        B = sub['Bias'].values
        #factor = choose_factor(B)
        #B_scaled = B * factor
        factor = 1
        B_scaled = B

        # Plot data (data=1.0)
        ax.plot(K, B_scaled, marker=markers[m], markersize=2.0, linestyle='-', linewidth=1.0, color=color)
        # Trend line (0.9)
        p = None
        if len(K) >= 2:
            coeffs = np.polyfit(K, B_scaled, 1)
            p = np.poly1d(coeffs)
            K_line = np.linspace(0, 20, 200)
            ax.plot(K_line, p(K_line), color=color, linestyle='--', linewidth=0.9)
            r = float(np.corrcoef(K, B_scaled)[0,1])
        else:
            r = 0.0

        # Axis appearance
        ax.set_xticks(x_ticks)
        ax.set_xlim(0, 20)
        ax.grid(True, linestyle=':', alpha=0.5)
        ax.tick_params(length=2, pad=1)
        ax.set_yticks([]) # remove y-axis ticks and labels
        if i == 0:
            ax.set_title(d, fontsize=LABEL_SIZE, fontweight=LABEL_WEIGHT)
            #ax.set_title(d)
        ax.set_xlabel('')
        ax.set_ylabel('')

        # Multiplier Ã—10^{-n} - this section is for having the multiplier annotation
        #n_map = {1:0, 10:1, 100:2, 1000:3}
        #n = n_map.get(factor, 0)
        #ax.text(0.02, 1.04, r"$\times 10^{-%d}$" % n, transform=ax.transAxes,
        #        fontsize=6, va='bottom', ha='left')

        # Forced placements per user instruction
        #force_TR = (m == 'KNN' and d == 'D3')
        #force_BL = (m == 'LR' and d in ['D1','D2'])

        if force_TR:
            ax.text(0.98, 0.96, f"r = {r:.2f}", transform=ax.transAxes,
                    fontsize=9, va='top', ha='right',
                    bbox=dict(boxstyle='round,pad=0.15', fc='white', ec='none', alpha=0.7))
            continue
        if force_BL:
            ax.text(0.02, 0.06, f"r = {r:.2f}", transform=ax.transAxes,
                    fontsize=9, va='bottom', ha='left',
                    bbox=dict(boxstyle='round,pad=0.15', fc='white', ec='none', alpha=0.7))
            continue

        # Otherwise smart placement: TL -> BL -> TR with no-overlap fallback
        place = 'TL'
        if is_crowded(K, B_scaled, p, 'TL'):
            if not is_crowded(K, B_scaled, p, 'BL'):
                place = 'BL'
            elif not is_crowded(K, B_scaled, p, 'TR'):
                place = 'TR'
            else:
                place = 'TL'
                ax.text(0.08, 0.90, f"r = {r:.2f}", transform=ax.transAxes,
                        fontsize=9, va='top', ha='left',
                        bbox=dict(boxstyle='round,pad=0.15', fc='white', ec='none', alpha=0.7))
                continue
        if place == 'TL':
            ax.text(0.02, 0.96, f"r = {r:.2f}", transform=ax.transAxes,
                    fontsize=9, va='top', ha='left',
                    bbox=dict(boxstyle='round,pad=0.15', fc='white', ec='none', alpha=0.7))
        elif place == 'BL':
            ax.text(0.02, 0.06, f"r = {r:.2f}", transform=ax.transAxes,
                    fontsize=9, va='bottom', ha='left',
                    bbox=dict(boxstyle='round,pad=0.15', fc='white', ec='none', alpha=0.7))
        elif place == 'TR':
            ax.text(0.98, 0.96, f"r = {r:.2f}", transform=ax.transAxes,
                    fontsize=9, va='top', ha='right',
                    bbox=dict(boxstyle='round,pad=0.15', fc='white', ec='none', alpha=0.7))

# Layout and labels: apply new offsets
fig.subplots_adjust(left=0.30)
label_x = 0.0040
for i, m in enumerate(models):
    fig.text(label_x, (len(models)-i-0.5)/len(models), m, va='center', ha='center', fontsize=LABEL_SIZE, fontweight=LABEL_WEIGHT, rotation=90)

try:
    fig.supylabel('Bias', fontsize=9)
except Exception:
    fig.text(0.006, 0.5, 'Bias', rotation='vertical', va='center', fontsize=9)

try:
    fig.supxlabel('K - value', fontsize=9) #fontsize was 8 before
except Exception:
    fig.text(0.5, 0.01, 'K - value', ha='center', fontsize=9) #before fontsize=8

#fig.suptitle('Change in Bias with K value (Large Dataset)', fontsize=9)

# Save outputs
png_name = 'k_vs_bias_grid_bias_label_r_forced_and_smart_left030_x00040.png'
pdf_name = 'k_vs_bias_grid_bias_label_r_forced_and_smart_left030_x00040.pdf'
eps_name = 'k_vs_bias_grid_bias_label_r_forced_and_smart_left030_x00040.eps'
fig.savefig(png_name, format='png')
fig.savefig(pdf_name, format='pdf')
fig.savefig(eps_name, format='eps')
plt.show(fig)
plt.close(fig)

print(png_name)
print(pdf_name)
print(eps_name)


# Welcome to Colab!