# Preparation
Run the following code block before anything else: this will import all the input data and adds some extrapolated data points. Also normalizes all values, so they are more directly comparable.

In [5]:
# Import packages needed for data management and analysis
import pandas as pd
import numpy as np

from sklearn.preprocessing import normalize

# Shortcut function to allow by-column normalization
# (normalize by default assumes by-row)
def transposed_normalize(df):
    return pd.DataFrame(normalize(df.T), index=df.columns, columns=df.index).T

# Stream-Derived Data
frame_absorb = pd.read_csv('InputData/StreamDerived/SubsumeFrames.csv',
                           index_col="Frame")
ability_use = pd.read_csv('InputData/StreamDerived/SubsumeAbility.csv',
                          index_col=["Ability", "Frame"])
ability_replace = pd.read_csv('InputData/StreamDerived/SubsumeReplace.csv',
                              index_col=["Ability", "Frame"])

# Game-Derived Data
subsume_cost = pd.read_csv('InputData/GameDerived/SubsumeFrameCost.csv',
                           index_col="Frame")
ability_cost = pd.read_csv('InputData/GameDerived/ReplacementAbilityCost.csv',
                           index_col=["Ability", "Frame"])

# The sums of costs might be worth comparing as well
subsume_cost['Total'] = subsume_cost.apply(np.sum, axis=1)
ability_cost['Total'] = ability_cost.apply(np.sum, axis=1)

# The replacement rate needs to be by frame (no other union exists)
ability_replace = ability_replace.groupby("Frame").sum()

# Normalize all data so it can be easily compared
frame_absorb = transposed_normalize(frame_absorb)
ability_use = transposed_normalize(ability_use)
ability_replace = transposed_normalize(ability_replace)

subsume_cost = transposed_normalize(subsume_cost)
ability_cost = transposed_normalize(ability_cost)

# Label the columns in each frame for easy referencing later
frame_absorb.rename(columns=lambda col: f"{col} (Frame Subsumed)", inplace=True)
ability_use.rename(columns=lambda col: f"{col} (Ability Imprinted)", inplace=True)
ability_replace.rename(columns=lambda col: f"{col} (Ability Removed)", inplace=True)
subsume_cost.rename(columns=lambda col: f"{col} (Cost of Subsuming)", inplace=True)
ability_cost.rename(columns=lambda col: f"{col} (Cost of Imprinting)", inplace=True)

## Interactive Analysis (X vs. Y, scatter plot)
Use this to compare various correlations between metrics at the Warframe level.
Run the code block below to prepare the analysis

In [6]:
# Merge all Frame-based data into one dataframe for easy usage
# Ability derived values need to be treated differently to prevent data duplication for Helminth values
full_frame_data = ability_cost.reset_index(level=1).join(ability_use.reset_index(level=1).drop("Frame", axis=1), on="Ability")
full_frame_data.reset_index(inplace=True)
full_frame_data.set_index("Frame", inplace=True)
# Now merge in frame-only data, which lacks the Helminth-derived issues above
full_frame_data = full_frame_data.join(frame_absorb)
full_frame_data = full_frame_data.join(ability_replace)
full_frame_data = full_frame_data.join(subsume_cost)

# Set the index to be ["Frame", "Ability"] for easy usage below
full_frame_data.reset_index(inplace=True)
full_frame_data.set_index(["Frame", "Ability"], inplace=True)

# Prepare everything we need for interactive plotting
%matplotlib widget
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display

# Generate widgets which select for data on a given axis
x_dropdown = widgets.Dropdown(
    options=sorted(full_frame_data.columns),
    value='Total (Cost of Subsuming)',
    description="(X)"
)

y_dropdown = widgets.Dropdown(
    options=sorted(full_frame_data.columns),
    value='Prevalence (Frame Subsumed)',
    description="(Y)"
)

# Render the default plot (total subsume cost vs. subsume use)
fig1, ax1 = plt.subplots()
sct1 = ax1.scatter(full_frame_data['Total (Cost of Subsuming)'],
                 full_frame_data['Prevalence (Frame Subsumed)'],
                 alpha=0.2)
ax1.set_xlabel('Total (Cost of Subsuming)')
ax1.set_ylabel('Prevalence (Frame Subsumed)')
annot1 = ax1.annotate("", xy=(0,0), xytext=(5,5), textcoords="offset points")
annot1.set_visible(False)

# Enable resizing (I.E. zooming in)
fig1.canvas.resizable = True

# Shortcut to update annotations (tooltips) on an update
def update_annot_fig1(ind):
    # Determine where the tooltip should be drawn
    pos = sct1.get_offsets()[ind["ind"][0]]
    annot1.xy = pos
    # Build up the tooltip's text
    idx = ind["ind"] # Events reported index is nested for some reason
    labels = full_frame_data.index[idx]
    tooltip_str = "\n".join([f"{x[0]} ({x[1]})" for x in labels])
    # Update hte text for the tooltip
    annot1.set_text(tooltip_str)

# Function to update the tooltip when a data point is hovered over
def hover_update_1(event):
    vis = annot1.get_visible()
    if event.inaxes == ax1:
        cont, ind = sct1.contains(event)
        if cont:
            update_annot_fig1(ind)
            annot1.set_visible(True)
        elif vis:
            annot1.set_visible(False)
        fig1.canvas.draw_idle()

fig1.canvas.mpl_connect("motion_notify_event", hover_update_1)

# Mediate changes when X is updated
def update_x(change):
    new_vals = full_frame_data[change.new]
    sct1.set_offsets(np.c_[new_vals, full_frame_data[y_dropdown.value]])
    ax1.set_xlabel(change.new)
    # Rescale the plot to account for changes in data dist.
    min_val = np.min(new_vals)
    max_val = np.max(new_vals)
    buffer = 0.1 * (max_val - min_val)
    ax1.set_xlim(left=min_val-buffer,
                right=max_val-buffer)

# Mediate changes when Y is updated
def update_y(change):
    new_vals = full_frame_data[change.new]
    sct1.set_offsets(np.c_[full_frame_data[x_dropdown.value],
                          full_frame_data[change.new]])
    ax1.set_ylabel(change.new)
    # Rescale the plot to account for changes in data dist.
    min_val = np.min(new_vals)
    max_val = np.max(new_vals)
    buffer = 0.1 * (max_val - min_val)
    ax1.set_ylim(top=max_val+buffer,
                bottom=min_val-buffer)

x_dropdown.observe(update_x, names='label')
y_dropdown.observe(update_y, names='label')

display(x_dropdown)
display(y_dropdown)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Dropdown(description='(X)', index=16, options=('Bile (Cost of Imprinting)', 'Bile (Cost of Subsuming)', 'Bioti…

Dropdown(description='(Y)', index=12, options=('Bile (Cost of Imprinting)', 'Bile (Cost of Subsuming)', 'Bioti…

## Pearson's Correlation of Metrics
The plot below is the data above, but tested for direct correlation
(using Pearson's correlation coefficient). 'r' is the coefficient of
correlation (1 is perfect positive correlation, -1 is perfect inverse
correlation, 0 is no correlation whatsoever), and 'p' is the statistical
p-value (which roughly corresponds to how likely this result would be
observed by random chance; lower values are better)

In [7]:
# Import needed packages
from itertools import combinations
from scipy.stats import pearsonr

# Calculate the correlations matrix for the resulting DF
corr_df = pd.DataFrame(index=["r", "p"])
combos = combinations(full_frame_data.columns, 2)

# Drop combinations which we already know have correlation
# (namely costs to costs of specific secretions to one another)
redundant_vals = [
    "Imprinting",
    "Subsuming"
]
def filter_combos(val):
    first_bool = False
    second_bool = False
    for r in redundant_vals:
        if r in val[0]:
            first_bool = True
        if r in val[1]:
            second_bool = True
        if first_bool and second_bool:
            return False
    return True
combos = list(filter(filter_combos, combos))

# Calculate the correlation values
for c in combos:
    idx = f"{c[0]} <-> {c[1]}"
    x = full_frame_data.dropna().loc[:, c[0]]
    y = full_frame_data.dropna().loc[:, c[1]]
    r, p = pearsonr(x, y)
    corr_df[idx] = [r, p]

# Flip the df for easy management in plotting
corr_df = corr_df.T

# Plot the resulting correlation results
fig2, ax2 = plt.subplots()
sct2 = ax2.scatter(corr_df["r"], corr_df["p"], alpha=0.2)

ax2.set_xlabel('r')
ax2.set_ylabel('p')
ax2.hlines(0.05, xmin=-1, xmax=1,
           color="red", linestyles='dashed')
annot2 = ax2.annotate("", xy=(0,0), xytext=(5,5), textcoords="offset points")
annot2.set_visible(False)

# Shortcut to update annotations (tooltips) on an update
def update_annot_fig2(ind):
    # Determine where the tooltip should be drawn
    pos = sct2.get_offsets()[ind["ind"][0]]
    annot2.xy = pos
    # Build up the tooltip's text
    idx = ind["ind"] # Events reported index is nested for some reason
    labels = corr_df.index[idx]
    tooltip_str = "\n".join(labels)
    # Update hte text for the tooltip
    annot2.set_text(tooltip_str)

# Function to update the tooltip when a data point is hovered over
def hover_update_2(event):
    vis = annot2.get_visible()
    if event.inaxes == ax2:
        cont, ind = sct2.contains(event)
        if cont:
            update_annot_fig2(ind)
            annot2.set_visible(True)
        elif vis:
            annot2.set_visible(False)
        fig2.canvas.draw_idle()

fig2.canvas.mpl_connect("motion_notify_event", hover_update_2)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

9

## Helminth Filtered Correlations
Some correlation results may have been biased by the (frankly absurd)
skew provided by the Helminth ability costs and usage. Thus, we repeated
the above analysis without the Helminth data to see if any changes
result.

In [8]:
# A subset of the full df without Helminth data
subset_df = full_frame_data.drop(index=["Helminth"])

# Calculate the correlation values
corr_df2 = pd.DataFrame(index=["r", "p"])
for c in combos:
    idx = f"{c[0]} <-> {c[1]}"
    x = full_frame_data.dropna().loc[:, c[0]]
    y = full_frame_data.dropna().loc[:, c[1]]
    r, p = pearsonr(x, y)
    corr_df2[idx] = [r, p]

# Flip the df for easy management in plotting
corr_df2 = corr_df2.T

# Plot the resulting correlation results
fig3, ax3 = plt.subplots()
sct3 = ax3.scatter(corr_df2["r"], corr_df2["p"], alpha=0.2)

ax3.set_xlabel('r')
ax3.set_ylabel('p')
ax3.hlines(0.05, xmin=-1, xmax=1,
           color="red", linestyles='dashed')
annot3 = ax3.annotate("", xy=(0,0), xytext=(5,5), textcoords="offset points")
annot3.set_visible(False)

# Shortcut to update annotations (tooltips) on an update
def update_annot_fig3(ind):
    # Determine where the tooltip should be drawn
    pos = sct3.get_offsets()[ind["ind"][0]]
    annot3.xy = pos
    # Build up the tooltip's text
    idx = ind["ind"] # Events reported index is nested for some reason
    labels = corr_df.index[idx]
    tooltip_str = "\n".join(labels)
    # Update hte text for the tooltip
    annot3.set_text(tooltip_str)

# Function to update the tooltip when a data point is hovered over
def hover_update_3(event):
    vis = annot3.get_visible()
    if event.inaxes == ax3:
        cont, ind = sct3.contains(event)
        if cont:
            update_annot_fig3(ind)
            annot3.set_visible(True)
        elif vis:
            annot3.set_visible(False)
        fig3.canvas.draw_idle()

fig3.canvas.mpl_connect("motion_notify_event", hover_update_3)


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

9