In [None]:
import os
import yaml
import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from plateconfig import PlateConfig


general_data_dir = "../data"
all_dirs = ["07182025_IF_scan"]
config_dict = {}
if_data_file = "Cell.csv"

for data_dir in all_dirs:
	config_fname = f"{general_data_dir}/{data_dir}/plate_config.yaml"
	if os.path.exists(config_fname):
		plate_obj = PlateConfig(config_fname)
		config_dict[data_dir] = plate_obj
	else:
		print(f"Config file does not exist.")


In [None]:
dfs = []
for dir in all_dirs:
	full_fname = f"{general_data_dir}/{dir}/{if_data_file}"
	columns = ["WellId", "CircTotalIntenCh2", "CircTotalIntenCh3"]
	df = pd.read_csv(full_fname, usecols=columns)
	df["WellId"] = [x.strip() for x in df["WellId"]] # remove leading/trailing spaces in WellIds

	# get the corresponding configuration dictionary
	plate_config = config_dict[dir]
	plate_config.rows = plate_config.row_to_patient.keys()
 
	columns = list(plate_config.column_to_stain.keys())
	# Combine control wells (i.e., B1 and B2) into a single WellId
	if plate_config.double_control_wells:
		control_column_tuples = [(1,2),(7,8)]
		control_well_dict = {}
		for tup in control_column_tuples:
			for row in plate_config.row_to_patient.keys():
				well_id1 = f"{row}{tup[0]:02d}"
				well_id2 = f"{row}{tup[1]:02d}"
				df = df.replace(well_id2, well_id1)
		columns.remove(2)
		columns.remove(8)
	plate_config.control_columns = [1, 7]
	plate_config.test_columns = [np.r_[3:7], np.r_[9:13]]
	plate_config.columns = columns
 
	# store row and columns for each well id as individual columns
	df["Row"] = [x[0] for x in df["WellId"]]
	df["Column"] = [x[1:] for x in df["WellId"]]
	plate_config.set_plate_data(df)
	display(df)

In [None]:
from scipy import stats

for dir in all_dirs:
	plate_config = config_dict[dir]
	 
	def pairwise_lfcs(control, test):
		print("IN THE FUNCTION")
		results = []
		for a, b in itertools.product(test, control):
			#print(a,b)
			if b == 0 or a == 0:
				log2fc = np.nan  # avoid division by zero
			else:
				log2fc = np.log2(a / b)
			results.append(log2fc)
		print(results)
		return results

	lfc_df = pd.DataFrame()
	def calculate_fold_changes(plate):
		data = plate.data
		for row in data["Row"].unique():
			for i, control_col in enumerate(plate.control_columns):
				control_well_data = data[(data["Row"] == row) & (data["Column"] == f"{control_col:02d}")]
				
				ctrl_stat1 = control_well_data['CircTotalIntenCh2']
				ctrl_stat3 = control_well_data['CircTotalIntenCh3']
				ctrl_normal_stat, ctrl_pval = stats.shapiro(ctrl_stat1)
				plt.figure()
				plt.title(f"{plate.column_to_stain[control_col]}1 - Row {row} - Donor {plate.row_to_patient[row]}")
				plt.hist(ctrl_stat1, bins=30, label=f"Control ({ctrl_normal_stat:.2f})", alpha=0.5)
				plt.xlabel("Fluorescence Intensity")
				plt.ylabel("Count")
				
				for test_col in plate_config.test_columns[i]:
					test_data = data[(data["Row"] == row) & (data["Column"] == f"{test_col:02d}")]		
					test_stat1 = test_data['CircTotalIntenCh2']
					test_stat3 = test_data['CircTotalIntenCh3']
		 
					test_normal_stat, test_pval = stats.shapiro(test_stat1)			
					label = f"{plate.column_to_t_pts[test_col]} min ({test_normal_stat:.2f})"
					plt.hist(test_stat1, bins=30, label=label, alpha=0.5)

					#stat1_lfc = pairwise_lfcs(control=control_well_data["CircTotalIntenCh2"],
					#					test=test_data["CircTotalIntenCh2"])
					#stat3_lfc = pairwise_lfcs(control=control_well_data["CircTotalIntenCh3"],
					#					test=test_data["CircTotalIntenCh3"])
					#print("HERE IS STAT! LFCS:", stat1_lfc)
					#break
				plt.legend()
				#break
			#break

	calculate_fold_changes(plate=plate_config)