# Parent and Progeny Correlations
Code to plot correlation and count division times from separation, then color spots that way. 
The df we create is similar to the Random Forest DF (without ruby features) but for ease of reading and making sure nothing gets mixed up these are in separate notebooks. 

In [None]:
from pathlib import Path
import seaborn as sns
from matplotlib import pyplot as plt
import xarray as xr
import numpy as np
import pandas as pd
import os
import re
from collections import defaultdict
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.metrics import r2_score



# makes figures look better in Jupyter
sns.set_context('talk')
sns.set_style("ticks")
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42

# Prepping DF

In [None]:
plot_output = ''
df_slope = pd.read_csv(os.path.join(plot_output, 'Switch_Stats.csv'))

In [None]:
df_slope_switch = df_slope[df_slope['category']=='switch']
df_slope_switch_no_mothers = df_slope_switch[df_slope_switch['parent']!=0]
df_final = df_slope_switch_no_mothers.dropna(subset=['parent_start_inc'])
df_final['parent_start_inc_div_norm'] = df_final['parent_start_inc']-df_final['appearance_time']
df_final['start_inc_div_norm'] = df_final['start_inc']-df_final['appearance_time']
len(df_final)

In [None]:
df_30min = df_final[df_final['start_inc'] > df_final['appearance_time']+30]
df_filtered = df_30min[df_30min['parent_start_inc_div_norm']>0]
print(len(df_30min))
print(len(df_filtered))

In [None]:
#Plot the raw to see how it looks
df_final['appearance_to_start'] = df_final['start_inc'] - df_final['appearance_time']
ax = sns.scatterplot(data=df_final, x='parent_start_inc', y='start_inc', s =25, alpha = 1, hue= df_final['appearance_to_start'], palette = 'viridis', hue_norm =(0,500) ) 
ax.set_xlim(100,1000)
ax.set_ylim(100, 1000)
ax.legend(loc='center left', bbox_to_anchor=(1.02, 0.5))
print("R^2 Score:", r2_score(df_final['parent_start_inc'], df_final['start_inc']))

In [None]:
#Normalize by the time of division and plot
df_final['parent_start_inc_div_norm'] = df_final['parent_start_inc']-df_final['appearance_time']
df_final['start_inc_div_norm'] = df_final['start_inc']-df_final['appearance_time']
ax = sns.scatterplot(data=df_final, x='parent_start_inc_div_norm', y='start_inc_div_norm', s =25, alpha = 1) 
x1 = np.array([-200, 800])
y1 = 1 * x1 + 0
ax.set_xlim(-200,800)
ax.set_ylim(-200, 800)
plt.plot(x1, y1, "black")
ax.legend(loc='center left', bbox_to_anchor=(1.02, 0.5))
print("R^2 Score:", r2_score(df_final['parent_start_inc_div_norm'], df_final['start_inc_div_norm']))

In [None]:
ax = sns.scatterplot(data=df_filtered, x='parent_start_inc_div_norm', y='start_inc_div_norm', color = '#431c54', s=20, alpha= 0.75) 

y_perfect = df_filtered['parent_start_inc_div_norm']
y_test = df_filtered['start_inc_div_norm']
y_mean = df_filtered['start_inc_div_norm'].mean()

ss_total = np.sum((y_test - y_mean)**2)

ss_residual = np.sum((y_test - y_perfect)**2)

# Calculate R-squared
r_squared = 1 - (ss_residual / ss_total)

print(f"R-squared (vs. perfect y=x): {r_squared}")
plt.plot(x1, y1, "darkgrey")
ax.set_xlim(0,800)
ax.set_xticks([0, 120, 240, 360, 480, 600, 720, 840])
ax.set_yticks([0, 120, 240, 360, 480, 600, 720, 840])

ax.set_ylim(0, 800)
sns.despine()
plt.savefig(plot_output +'/final_parent_vs_prog.pdf',bbox_inches='tight', transparent=True)

In [None]:
accuracy_df = pd.DataFrame({'Experimental': y_test, 'Predicted': y_perfect, 'X':df_filtered['parent_start_inc_div_norm'] })
accuracy_df['individual_acc']= abs((accuracy_df['Predicted']-accuracy_df['Experimental'])/accuracy_df['Experimental'])
accuracy_df['individual_acc_percent'] = accuracy_df['individual_acc']*100
accuracy = 1-(1/len(accuracy_df) * accuracy_df['individual_acc'].sum()) 
print(accuracy_df['individual_acc'].mean())
print(accuracy_df['individual_acc'].std())
print(accuracy)
accuracy_df

In [None]:
plt.figure(figsize=(6, 3))
ax = sns.regplot(data=accuracy_df, x='X', y='individual_acc_percent', scatter_kws={'s': 10,  'alpha': 0.7}) 
ax.set_xticks([0, 120, 240, 360, 480, 600, 720, 840])
sns.despine()
plt.savefig(plot_output +'/final_parent_vs_prog_accovertime.pdf',bbox_inches='tight', transparent=True)

In [None]:
plt.figure(figsize=(6, 3))
ax = sns.regplot(data=accuracy_df, x='X', y='individual_acc_percent', scatter_kws={'s': 10, 'alpha': 0.7}) 
ax.set_xticks([0, 120, 240, 360, 480, 600, 720, 840])
ax.set_ylim(-5, 200)
sns.despine()
plt.savefig(plot_output +'/final_parent_vs_prog_accovertime_zoom.pdf',bbox_inches='tight', transparent=True)

In [None]:
ax = sns.scatterplot(data=accuracy_df, x='X', y='individual_acc_percent', color = '#431c54', s=20, alpha= 0.5) 
ax.set_ylim(-5, 100)

In [None]:
residuals = y_test - y_perfect

# Calculate MSE
mse = np.mean(residuals**2)

# Calculate RMSE (if preferred)
rmse = np.sqrt(mse)

print(f"MSE: {mse}")
print(f"RMSE: {rmse}")

In [None]:
ax = sns.kdeplot(data=df_final, x='parent_start_inc_div_norm', y='start_inc_div_norm') 
plt.plot(x1, y1, "black")


In [None]:
sns.residplot(data=df_final, x='parent_start_inc_div_norm', y='start_inc_div_norm', scatter_kws={'s': 10, 'color': 'green', 'alpha': 0.7})