In [2]:
import pandas as pd
import os
import plotly.express as px

regions_complete = pd.read_csv("regions_complete.csv", sep=";")

In [3]:
# get the csv files from the size_points folder

size_points_path = "size_points_Aud_injection"
min_pixel_size = 1
max_pixel_size = 17



# loop through the files and save the csv files as seperate dataframes with the name of the file
brains = {}
for file in os.listdir(size_points_path):
    if file.endswith(".csv"):
        # get the brain name from the file name
        brain_name_first_part = file.split("_")[0:4]
        black_white = file.split("_")[6]
        # join the brain name and black_white to a string

        brain_name = black_white + "_" + "_".join(brain_name_first_part)
        # remove the .csv from the end of the string
        brain_name = brain_name.replace(".csv", "")
        brains[brain_name] = pd.read_csv(os.path.join(size_points_path, file), sep=";")

In [14]:
# generate one file per brain with the same data frame 


def end_file_per_brain(white, black, name, age):
    one_brain_file = pd.DataFrame(
        columns=[
        "Brain",
        "Region ID",
        "General area",
        "Hemisphere",
        "count",
        "scaled count",
        "age",
        ]
    )
    total_counts = 0

    for index, row in white.iterrows():
        hemisphere = "right"

        if min_pixel_size <= row["Object area"] <= max_pixel_size:
            region_id = row["Region ID"]

            # check for the according General area in regions_complete
            general_region = regions_complete.loc[
                regions_complete["Region ID"] == region_id, "General area"
            ].values[0]
            
            if general_region != "Auditory areas":
                
                # if region_id is not in one_brain_file, add it
                if region_id not in one_brain_file["Region ID"].values:
                    one_brain_file.loc[region_id] = [name, region_id, general_region, hemisphere, 1, 0, age]
                else:
                    one_brain_file.loc[region_id, "count"] += 1

            # the counts for clear label and fiber tracts are not counted
            if general_region not in ["Clear Label", "fiber tracts", "Primary visual area", "Parent"]:
                total_counts += 1
    for index, row in black.iterrows():
        hemisphere = "left"
        if min_pixel_size <= row["Object area"] <= max_pixel_size:
            region_id = row["Region ID"]

            # check for the according General area in regions_complete
            general_region = regions_complete.loc[
                regions_complete["Region ID"] == region_id, "General area"
            ].values[0]
            # if region_id is not in one_brain_file with "right" in the hemisphere, add it
            if region_id not in one_brain_file["Region ID"].values:
                one_brain_file.loc[region_id] = [name, region_id, general_region, hemisphere, 1, 0, age]

            else:
                # if region_id is already in one_brain_file check if the hemisphere is "left"
                if one_brain_file.loc[region_id, "Hemisphere"] == "left":
                    # if it is, add the count to the existing count
                    one_brain_file.loc[region_id, "count"] += 1
                else:
                    one_brain_file.loc[region_id] = [name, region_id, general_region, hemisphere, 1, 0, age]
            

            if general_region not in ["Clear Label", "fiber tracts", "Parent"]:
                total_counts += 1

    # scale the counts
    one_brain_file["scaled count"] = one_brain_file["count"] / total_counts
    one_brain_file["scaled count"] = one_brain_file["count"] / total_counts

    return one_brain_file

In [15]:
n1_20250215_p7_13 = end_file_per_brain(
    brains["white_20250215_p7_13_n1"], brains["black_20250215_p7_13_n1"], "n1_20250215_p7_13", "p7-13"
)
n3_20250215_p7_13 = end_file_per_brain(
    brains["white_20250215_p7_13_n3"], brains["black_20250215_p7_13_n3"], "n3_20250215_p7_13", "p7-13"
)


In [16]:
big_file_Aud_injections = pd.concat(
    [
        n1_20250215_p7_13,
        n3_20250215_p7_13,
    ],
    ignore_index=True,
)


In [17]:
# take out Clear label and fiber tracts 
big_file_Aud_injections = big_file_Aud_injections[
    ~big_file_Aud_injections["General area"].isin(["Clear Label", "fiber tracts", "Parent"])
]

In [18]:
# Find regions with zero neurons

# add General area that are in regions_complete but not in Aud_injections
regions_complete = pd.read_csv("regions_complete.csv", sep=";")
General_areas_complete= regions_complete["General area"].unique()

# filter out "Clear Label" and "Fiber tracts" and "Parent" from General areas
General_areas_complete = [
    area for area in General_areas_complete if area not in ["Clear Label", "fiber tracts", "Parent", "retina"]
]

# make copy of the DataFrame to add missing General areas
big_file_Aud_injections = big_file_Aud_injections.copy()



general_area_left_p7_13 = []
general_area_right_p7_13  = []

# loop through big_file_V1_injections_long and add General area to general_area_left and general_area_right
for index, row in big_file_Aud_injections.iterrows():
    if row["Hemisphere"] == "left":
        general_area_left_p7_13.append(row["General area"])
         
        
    else:
        general_area_right_p7_13.append(row["General area"])


for area in General_areas_complete:
     
        
    
    
    if area not in general_area_left_p7_13:
        print("area not found in left hemisphere in p7_13:", area)
        big_file_Aud_injections.loc[area] = [0, 0, area, "left", 0, 0, "p7-13"]
    
    if area not in general_area_right_p7_13:
        print("area not found in right hemisphere in p7_13:", area)
        big_file_Aud_injections.loc[area] = [0, 0, area, "right", 0, 0, "p7-13"]
        
       



        



area not found in left hemisphere in p7_13: root
area not found in left hemisphere in p7_13: Frontal pole
area not found in right hemisphere in p7_13: Frontal pole
area not found in right hemisphere in p7_13: Auditory areas
area not found in right hemisphere in p7_13: Perirhinal area
area not found in right hemisphere in p7_13: Ectorhinal area
area not found in right hemisphere in p7_13: Main olfactory bulb
area not found in left hemisphere in p7_13: Accessory olfactory bulb
area not found in right hemisphere in p7_13: Accessory olfactory bulb
area not found in right hemisphere in p7_13: Anterior olfactory nucleus
area not found in right hemisphere in p7_13: Taenia tecta
area not found in left hemisphere in p7_13: Dorsal peduncular area
area not found in right hemisphere in p7_13: Piriform area
area not found in left hemisphere in p7_13: Nucleus of the lateral olfactory tract
area not found in right hemisphere in p7_13: Nucleus of the lateral olfactory tract
area not found in left hemi

In [19]:
# save the big file as csv
big_file_Aud_injections.to_csv("big_file_Aud_injections.csv", sep=";", index=False)

In [20]:
# Get the highest scaled count for 'right' hemisphere in each area
area_order_scaled = (
    big_file_Aud_injections[big_file_Aud_injections['Hemisphere'] == 'right']
    .groupby('General area')['scaled count']
    .max()
    .sort_values(ascending=False)
    .index.tolist()
)

area_order_count = (
    big_file_Aud_injections[big_file_Aud_injections['Hemisphere'] == 'right']
    .groupby('General area')['count']
    .max()
    .sort_values(ascending=False)
    .index.tolist()
)


In [21]:
fig = px.violin(
    big_file_Aud_injections,
    y="scaled count",
    x="General area",
    color="Hemisphere",
    
    box=True,
    points="all",
    color_discrete_map={
        "left": "orange",
        "right": "magenta"
    },
    title="failed V1 injections (Aud) scaled counts by hemisphere [p7-13]",
    category_orders={"General area": area_order_scaled}
)

fig.update_traces(meanline_visible=True)
fig.update_layout(violinmode='group')
fig.show()

In [22]:
fig = px.violin(
    big_file_Aud_injections,
    y="count",
    x="General area",
    color="Hemisphere",
    
    box=True,
    points="all",
    color_discrete_map={
        "left": "orange",
        "right": "magenta"
    },
    title="failed V1 injections (Aud) counts by hemisphere [p7-13]",
    category_orders={"General area": area_order_scaled}
)

fig.update_traces(meanline_visible=True)
fig.update_layout(violinmode='group')
fig.show()