In [46]:
import pandas as pd
import os
import plotly.express as px
import plotly.graph_objects as go

regions_complete = pd.read_csv("regions_complete.csv", sep=";")

In [47]:
# get the csv files from the size_points folder

size_points_path = "size_points_only_v1_injection"
min_pixel_size = 1
max_pixel_size = 17


# loop through the files and save the csv files as seperate dataframes with the name of the file
brains = {}
for file in os.listdir(size_points_path):
    if file.endswith(".csv"):
        # get the brain name from the file name
        brain_name_first_part = file.split("_")[0:4]
        black_white = file.split("_")[6]
        # join the brain name and black_white to a string

        brain_name = black_white + "_" + "_".join(brain_name_first_part)
        # remove the .csv from the end of the string
        brain_name = brain_name.replace(".csv", "")
        brains[brain_name] = pd.read_csv(os.path.join(size_points_path, file), sep=";")

ipsilateral = injection site of hemisphere
contralateral = opposite side

In [48]:
# generate one file per brain


def end_file_per_brain(white, black, name, age, injection_side, injection_area):
    one_brain_file = pd.DataFrame(
        columns=[
            "Brain",
            "Region ID",
            "General area",
            "Hemisphere",
            "count",
            "scaled count",
            "age",
        ]
    )
    total_counts = 0
    if injection_side == "left":
        white_side = "contralateral"
        black_side = "ipsilateral"
    else:
        white_side = "ipsilateral"
        black_side = "contralateral"

    for index, row in white.iterrows():

        hemisphere = white_side

        if min_pixel_size <= row["Object area"] <= max_pixel_size:
            region_id = row["Region ID"]

            # check for the according General area in regions_complete
            general_region = regions_complete.loc[
                regions_complete["Region ID"] == region_id, "General area"
            ].values[0]
            subdivided = regions_complete.loc[
                regions_complete["Region ID"] == region_id, "Subdivided"
            ].values[0]

            if subdivided != injection_area:

                # if region_id is not in one_brain_file, add it
                if region_id not in one_brain_file["Region ID"].values:
                    one_brain_file.loc[region_id] = [
                        name,
                        region_id,
                        general_region,
                        hemisphere,
                        1,
                        0,
                        age,
                    ]
                else:
                    one_brain_file.loc[region_id, "count"] += 1
            if subdivided == injection_area and injection_side == "left":
                if region_id not in one_brain_file["Region ID"].values:
                    one_brain_file.loc[region_id] = [
                        name,
                        region_id,
                        general_region,
                        hemisphere,
                        1,
                        0,
                        age,
                    ]
                else:
                    one_brain_file.loc[region_id, "count"] += 1

            # the counts for clear label and fiber tracts are not counted
            if (
                general_region not in ["Clear Label", "fiber tracts", "Parent"]
                and subdivided != "injection_area"
            ):
                total_counts += 1

    for index, row in black.iterrows():
        hemisphere = black_side

        if min_pixel_size <= row["Object area"] <= max_pixel_size:
            region_id = row["Region ID"]

            # check for the according General area in regions_complete
            general_region = regions_complete.loc[
                regions_complete["Region ID"] == region_id, "General area"
            ].values[0]
            subdivided = regions_complete.loc[
                regions_complete["Region ID"] == region_id, "Subdivided"
            ].values[0]

            if subdivided != injection_area:

                # if region_id is not in one_brain_file with "right" in the hemisphere, add it
                if region_id not in one_brain_file["Region ID"].values:
                    one_brain_file.loc[region_id] = [
                        name,
                        region_id,
                        general_region,
                        hemisphere,
                        1,
                        0,
                        age,
                    ]

                else:
                    # if region_id is already in one_brain_file check if the hemisphere is opposite or same side
                    if one_brain_file.loc[region_id, "Hemisphere"] == black_side:
                        # if it is, add the count to the existing count
                        one_brain_file.loc[region_id, "count"] += 1
                    else:
                        one_brain_file.loc[region_id] = [
                            name,
                            region_id,
                            general_region,
                            hemisphere,
                            1,
                            0,
                            age,
                        ]

                if subdivided == injection_area and injection_side == "right":
                    if region_id not in one_brain_file["Region ID"].values:
                        one_brain_file.loc[region_id] = [
                            name,
                            region_id,
                            general_region,
                            hemisphere,
                            1,
                            0,
                            age,
                        ]
                    else:
                        one_brain_file.loc[region_id, "count"] += 1

            if (
                general_region not in ["Clear Label", "fiber tracts", "Parent"]
                and subdivided != injection_area
            ):
                total_counts += 1

    # scale the counts
    one_brain_file["scaled count"] = 100 / total_counts * one_brain_file["count"]

    return one_brain_file

In [100]:
n3_20241010_p1_7 = end_file_per_brain(
    brains["white_20241010_p1_7_n3"],
    brains["black_20241010_p1_7_n3"],
    "n3_20241010_p1_7",
    "p1-7",
    "right",
    "Primary visual area",
)
n11_20241103_p1_7 = end_file_per_brain(
    brains["white_20241103_p1_7_n11"],
    brains["black_20241103_p1_7_n11"],
    "n11_20241103_p1_7",
    "p1-7",
    "right",
    "Primary visual area",
)
n5_20241103_p1_7 = end_file_per_brain(
    brains["white_20241103_p1_7_n5"],
    brains["black_20241103_p1_7_n5"],
    "n5_20241103_p1_7",
    "p1-7",
    "right",
    "Primary visual area",
)
# n1_20241203_p7_13 = end_file_per_brain(
    # brains["white_20241209_p7_13_n1"],
    # brains["black_20241209_p7_13_n1"],
    # "n1_20241203_p7_13",
    # "p7-13",
    # "right",
    # "Primary visual area",
# )
n2_20241203_p7_13 = end_file_per_brain(
    brains["white_20241203_p7_13_n2"],
    brains["black_20241203_p7_13_n2"],
    "n2_20241203_p7_13",
    "p7-13",
    "right",
    "Primary visual area",
)
n4_20241203_p7_13 = end_file_per_brain(
    brains["white_20241203_p7_13_n4"],
    brains["black_20241203_p7_13_n4"],
    "n4_20241203_p7_13",
    "p7-13",
    "right",
    "Primary visual area",
)
n5_20241203_p7_13 = end_file_per_brain(
    brains["white_20241203_p7_13_n5"],
    brains["black_20241203_p7_13_n5"],
    "n5_20241203_p7_13",
    "p7-13",
    "right",
    "Primary visual area",
)
# n8_20241203_p7_13 = end_file_per_brain(
    # brains["white_20241203_p7_13_n8"],
    # brains["black_20241203_p7_13_n8"],
    # "n8_20241203_p7_13",
    # "p7-13",
    # "right",
    # "Primary visual area",
# )
n1_20241209_p7_13 = end_file_per_brain(
    brains["white_20241209_p7_13_n1"],
    brains["black_20241209_p7_13_n1"],
    "n1_20241209_p7_13",
    "p7-13",
    "right",
    "Primary visual area",
)
n2_20241209_p7_13 = end_file_per_brain(
    brains["white_20241209_p7_13_n2"],
    brains["black_20241209_p7_13_n2"],
    "n2_20241209_p7_13",
    "p7-13",
    "right",
    "Primary visual area",
)
# n3_20241209_p7_13 = end_file_per_brain(
    # brains["white_20241209_p7_13_n3"],
    # brains["black_20241209_p7_13_n3"],
    # "n3_20241209_p7_13",
    # "p7-13",
    # "right",
    # "Primary visual area",
# )
n4_20241209_p7_13 = end_file_per_brain(
    brains["white_20241209_p7_13_n4"],
    brains["black_20241209_p7_13_n4"],
    "n4_20241209_p7_13",
    "p7-13",
    "right",
    "Primary visual area",
)

In [101]:
# make the big mega file with all the brains put the brain name in the first column
big_file_V1_injections = pd.concat(
    [
        n3_20241010_p1_7,
        n11_20241103_p1_7,
        n5_20241103_p1_7,
        
        n2_20241203_p7_13,
        n4_20241203_p7_13,
        n5_20241203_p7_13,
        
        n1_20241209_p7_13,
        n2_20241209_p7_13,
        
        n4_20241209_p7_13,
    ],
    ignore_index=True,
)
# n3_20241209_p7_13, n1_20241203_p7_13, n8_20241203_p7_13,

In [102]:
# take out Clear label and fiber tracts
big_file_V1_injections = big_file_V1_injections[
    ~big_file_V1_injections["General area"].isin(
        ["Clear Label", "fiber tracts", "Parent"]
    )
]

In [103]:
# Find regions with zero neurons


def find_zero_neurons(
    big_file,
    general_area_ipsi_age_A,
    general_area_contra_age_A,
    general_area_ipsi_age_B,
    general_area_contra_age_B,
    age_A,
    age_B,
):

    # add General area that are in regions_complete but not in S1_injections
    regions_complete = pd.read_csv("regions_complete.csv", sep=";")
    General_areas_complete = regions_complete["General area"].unique()

    # filter out "Clear Label" and "Fiber tracts" and "Parent" from General areas
    General_areas_complete = [
        area
        for area in General_areas_complete
        if area not in ["Clear Label", "fiber tracts", "Parent", "retina"]
    ]

    # make copy of the DataFrame to add missing General areas
    big_file_long = big_file.copy()

    general_area_ipsi_age_A = []
    general_area_contra_age_A = []
    general_area_ipsi_age_B = []
    general_area_contra_age_B = []

    # loop through big_file_long and add General area to lists
    for index, row in big_file_long.iterrows():
        if row["Hemisphere"] == "ipsilateral":
            if row["age"] == age_A:
                general_area_ipsi_age_A.append(row["General area"])
            else:
                general_area_ipsi_age_B.append(row["General area"])

        else:
            if row["age"] == age_A:
                general_area_contra_age_A.append(row["General area"])
            else:
                general_area_contra_age_B.append(row["General area"])

    for area in General_areas_complete:
        if area not in general_area_ipsi_age_A:
            print("area not found in", "general_area_ipsi", age_A, ":", area)

            big_file_long.loc[area] = [0, 0, area, "ipsilateral", 0, 0, age_A]

        if area not in general_area_contra_age_A:
            print("area not found in", "general_area_contra", age_A, ":", area)
            big_file_long.loc[area] = [0, 0, area, "contralateral", 0, 0, age_A]

        if area not in general_area_ipsi_age_B:
            print("area not found in", "general_area_ipsi", age_B, ":", area)
            big_file_long.loc[area] = [0, 0, area, "ipsilateral", 0, 0, age_B]

        if area not in general_area_contra_age_B:
            print("area not found in", "general_area_contra", age_B, ":", area)
            big_file_long.loc[area] = [0, 0, area, "contralateral", 0, 0, age_B]

    return big_file_long

In [104]:
big_file_V1_injections_long = find_zero_neurons(
    big_file_V1_injections,
    "general_area_ipsi_p1-7",
    "general_area_contra_p1-7",
    "general_area_ipsi_p7-13",
    "general_area_contra_p7-13",
    "p1-7",
    "p7-13",
)

area not found in general_area_ipsi p1-7 : root
area not found in general_area_ipsi p7-13 : root
area not found in general_area_ipsi p1-7 : Temporal association areas
area not found in general_area_ipsi p1-7 : Ectorhinal area
area not found in general_area_ipsi p1-7 : Main olfactory bulb
area not found in general_area_ipsi p1-7 : Accessory olfactory bulb
area not found in general_area_ipsi p7-13 : Accessory olfactory bulb
area not found in general_area_contra p7-13 : Accessory olfactory bulb
area not found in general_area_ipsi p1-7 : Anterior olfactory nucleus
area not found in general_area_ipsi p1-7 : Taenia tecta
area not found in general_area_ipsi p1-7 : Dorsal peduncular area
area not found in general_area_contra p7-13 : Dorsal peduncular area
area not found in general_area_ipsi p1-7 : Piriform area
area not found in general_area_ipsi p7-13 : Piriform area
area not found in general_area_contra p7-13 : Piriform-amygdalar area
area not found in general_area_ipsi p1-7 : Postpiriform t

In [105]:
# save the big file as csv
big_file_V1_injections_long.to_csv(
    "big_file_V1_injections_long.csv", sep=";", index=False
)

In [None]:
# make a file with only general regions counts which are then scaled to the total count of the brain
general_areas_file = pd.DataFrame(
    columns=[
        "Brain",
        "General area",
        "Hemisphere",
        "count",
        "scaled count",
        "age",
    ]
)
totals = pd.DataFrame(columns=["Brain", "total"])
totals_ipsi = pd.DataFrame(columns=["Brain", "total"])
for index, row in big_file_V1_injections_long.iterrows():
    general_area = row["General area"]
    hemisphere = row["Hemisphere"]
    age = row["age"]
    name = row["Brain"]
    amount = row["count"]
    if hemisphere == "ipsilateral":
        if name not in totals_ipsi["Brain"].values:
            totals_ipsi.loc[name] = [name, amount]
        else:
            totals_ipsi.loc[name, "total"] += amount


    if name not in totals["Brain"].values:
        totals.loc[name] = [name, amount]
    else:
        totals.loc[name, "total"] += amount

In [114]:
df_filtered = big_file_V1_injections_long[
    ["Brain", "age", "count", "Hemisphere", "General area"]
]

# Step 2: Group by the relevant identifiers and sum the count per group
df_grouped = df_filtered.groupby(
    ["Brain", "General area", "Hemisphere", "age"], as_index=False
)["count"].sum()

# add scaled column with 0
df_grouped["scaled count"] = 0

In [115]:
# scale the counts
for index, row in df_grouped.iterrows():
    brain_name = row["Brain"]
    count = row["count"]
    total_count = totals.loc[totals["Brain"] == brain_name, "total"].values[0]
    scaled_count = 1 / total_count * count
    df_grouped.loc[index, "scaled count"] = scaled_count
df_grouped


divide by zero encountered in scalar divide


invalid value encountered in scalar multiply



Unnamed: 0,Brain,General area,Hemisphere,age,count,scaled count
0,0,Accessory olfactory bulb,contralateral,p7-13,0,
1,0,Anterior olfactory nucleus,ipsilateral,p1-7,0,
2,0,Cerebellum,contralateral,p7-13,0,
3,0,Dorsal peduncular area,contralateral,p7-13,0,
4,0,Ectorhinal area,ipsilateral,p1-7,0,
...,...,...,...,...,...,...
515,n5_20241203_p7_13,Visual areas,contralateral,p7-13,329,0.113683
516,n5_20241203_p7_13,Visual areas,ipsilateral,p7-13,73,0.025225
517,n5_20241203_p7_13,root,contralateral,p7-13,5,0.001728
518,n5_20241203_p7_13,ventricular systems,contralateral,p7-13,5,0.001728


In [94]:
def get_area_order_by_age(big_file_long):
    # Filter for 'ipsilateral' hemisphere and age group 'p7-13'
    filtered = big_file_long[
        (big_file_long["Hemisphere"] == "ipsilateral")
        & (big_file_long["age"] == "p7-13")
    ]

    area_order_scaled = (
        filtered.groupby("General area")["scaled count"]
        .max()
        .sort_values(ascending=False)
        .index.tolist()
    )

    area_order_count = (
        filtered.groupby("General area")["count"]
        .max()
        .sort_values(ascending=False)
        .index.tolist()
    )

    return area_order_scaled, area_order_count

In [95]:
area_order_scaled, area_order_count = get_area_order_by_age(df_grouped)

In [None]:
def plot_scaled_zoomed(big_file_long, Title):
    filtered_areas = [
        "Somatosensory areas",
        "Retrosplenial area"
        
    ]
    df_hemi = big_file_long[
        (big_file_long["Hemisphere"] == "ipsilateral")
        & (big_file_long["General area"].isin(filtered_areas))
    ]
    area_order_scaled = (
        df_hemi.groupby("General area")["scaled count"]
        .max()
        .sort_values(ascending=False)
        .index.tolist()
    )
    

    fig = px.box(
        df_hemi,
        y="scaled count",
        x="General area",
        points="all",
        color="age",
        color_discrete_map={"p1-7": "orange", "p7-13": "magenta"},
        title=Title,
        category_orders={"General area": area_order_scaled},
        labels={"scaled count": "Scaled count", "General area": "General area"},
    )
    fig.update_layout(
        legend=dict(font=dict(size=20)),
        xaxis_title_font=dict(size=24),  # x-axis label font size
        yaxis_title_font=dict(size=24),  # y-axis label font size
        xaxis_tickfont=dict(size=20),  # x-axis tick font size
        yaxis_tickfont=dict(size=18),  # y-axis tick font size
    )

    return fig

In [130]:
plot_scaled_zoomed(
    df_grouped, "V1 injections scaled counts by hemisphere, p1-7 (n=3) and p7-13 (n=9) "
)

In [134]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def plot_counts_zoomed_subplots(big_file_long, Title):
    filtered_areas = [
        "Somatosensory areas",
        "Retrosplenial area"
    ]
    fig = make_subplots(rows=1, cols=2, subplot_titles=("Age: p1-7", "Age: p7-13"))
    age_labels = ["p1-7", "p7-13"]
    colors = {"ipsilateral": "orange", "contralateral": "magenta"}

    for i, age in enumerate(age_labels):
        df_age = big_file_long[
            (big_file_long["General area"].isin(filtered_areas)) &
            (big_file_long["age"] == age)
        ]
        area_order_count = (
            df_age.groupby("General area")["count"]
            .max()
            .sort_values(ascending=False)
            .index.tolist()
        )
        for hemi in ["ipsilateral", "contralateral"]:
            df_hemi = df_age[df_age["Hemisphere"] == hemi]
            fig.add_trace(
                go.Box(
                    y=df_hemi["count"],
                    x=df_hemi["General area"],
                    name=f"{hemi} - {age}",
                    marker_color=colors[hemi],
                    boxpoints="all",
                    legendgroup=hemi,
                    showlegend=(i==0) # only show legend for first plot
                ),
                row=1, col=i+1
            )

    fig.update_layout(
        title_text=Title,
        legend=dict(font=dict(size=18)),
        height=500, width=1000,
        font=dict(size=14),
    )
    fig.update_xaxes(title_text="General area", tickfont=dict(size=14))
    fig.update_yaxes(title_text="count", tickfont=dict(size=14))
    return fig


In [None]:
plot_counts_zoomed_subplots(
    df_filtered, "V1 injections  counts by hemisphere, p1-7 (n=3) and p7-13 (n=9) "
)

In [140]:
for index, row in big_file_V1_injections_long.iterrows():
    hemisphere = row["Hemisphere"]
    age = row["age"]
    if row["count"] == 962:
        print(row)

Brain             n5_20241103_p1_7
Region ID                      687
General area    Retrosplenial area
Hemisphere           contralateral
count                          962
scaled count                1.3985
age                           p1-7
Name: 912, dtype: object


In [None]:
def plot_scaled_zoomed(big_file_long, Title):
    filtered_areas = [
        "Somatosensory areas",
        "Retrosplenial area"
        
    ]
    df_hemi = big_file_long[
        (big_file_long["Hemisphere"] == "ipsilateral")
        & (big_file_long["General area"].isin(filtered_areas))
    ]
    area_order_scaled = (
        df_hemi.groupby("General area")["scaled count"]
        .max()
        .sort_values(ascending=False)
        .index.tolist()
    )
    

    fig = px.box(
        df_hemi,
        y="scaled count",
        x="General area",
        points="all",
        color="age",
        color_discrete_map={"p1-7": "orange", "p7-13": "magenta"},
        title=Title,
        category_orders={"General area": area_order_scaled},
        labels={"scaled count": "Scaled count", "General area": "General area"},
    )
    fig.update_layout(
        legend=dict(font=dict(size=20)),
        xaxis_title_font=dict(size=24),  # x-axis label font size
        yaxis_title_font=dict(size=24),  # y-axis label font size
        xaxis_tickfont=dict(size=20),  # x-axis tick font size
        yaxis_tickfont=dict(size=18),  # y-axis tick font size
    )

    return fig

In [None]:
plot_scaled_zoomed(
    df_grouped, "V1 injections scaled counts by hemisphere, p1-7 (n=3) and p7-13 (n=9) "
)

In [98]:
def plot_count_zoomed(big_file_long, Title):
    filtered_areas = [
        "Somatosensory areas",
        "Retrosplenial area",
        "Thalamus",
        "Orbital area",
        "Anterior cingulate area",
        "Auditory areas",
    ]
    df_hemi = big_file_long[
        (big_file_long["Hemisphere"] == "ipsilateral")
        & (big_file_long["General area"].isin(filtered_areas))
    ]
    area_order_count = (
        df_hemi.groupby("General area")["count"]
        .max()
        .sort_values(ascending=False)
        .index.tolist()
    )

    fig = px.box(
        df_hemi,
        y="count",
        x="General area",
        color="age",
        color_discrete_map={"p1-7": "orange", "p7-13": "magenta"},
        title=Title,
        category_orders={"General area": area_order_count},
        labels={"count": "Count", "General area": "General area"},
    )
    fig.update_layout(
        legend=dict(font=dict(size=20)),
        xaxis_title_font=dict(size=24),  # x-axis label font size
        yaxis_title_font=dict(size=24),  # y-axis label font size
        xaxis_tickfont=dict(size=21),  # x-axis tick font size
        yaxis_tickfont=dict(size=18),  # y-axis tick font size
    )

    return fig

In [99]:
plot_count_zoomed(
    df_grouped, "V1 injections counts by hemisphere, p1-7 (n=3) and p7-13 (n=9) "
)

In [71]:
def specific_interests(big_file_long, area_name, data_frame_name):

    data_frame_name = pd.DataFrame(
        big_file_long[big_file_long["General area"].isin([area_name])]
    )

    data_frame_name.insert(7, "Subdivided", [0] * len(data_frame_name))

    for index, row in data_frame_name.iterrows():
        region_id = row["Region ID"]
        subdivided_region = regions_complete.loc[
            regions_complete["Region ID"] == region_id, "Subdivided"
        ].values[0]
        # add the Subdivided values to the Somatosensory_areas DataFrame
        data_frame_name.at[index, "Subdivided"] = subdivided_region

    return data_frame_name

area 29 (granular): ventral RSC
area 30 (dysgranular): dorsal RSC and agranular RSC

In [72]:
Retrosplenial_areas = specific_interests(
    big_file_V1_injections_long, "Retrosplenial area", "Retrosplenial_areas"
)
Somato_areas = specific_interests(
    big_file_V1_injections_long, "Somatosensory areas", "Somato_areas"
)


Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value 'area 30' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.


Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value 'Primary somatosensory area' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



In [73]:
subdivided_rsc_filtered = Retrosplenial_areas[
    ["Brain", "age", "count", "Hemisphere", "Subdivided"]
]

subdivided_rsc_grouped = subdivided_rsc_filtered.groupby(
    ["Brain", "Subdivided", "Hemisphere", "age"], as_index=False
)["count"].sum()

# add scaled column with 0
subdivided_rsc_grouped["scaled count"] = 0
# scale the counts
for index, row in subdivided_rsc_grouped.iterrows():
    brain_name = row["Brain"]
    count = row["count"]
    total_count = totals.loc[totals["Brain"] == brain_name, "total"].values[0]
    scaled_count = 1 / total_count * count
    subdivided_rsc_grouped.loc[index, "scaled count"] = scaled_count
subdivided_rsc_grouped


Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '0.032377265528770016' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



Unnamed: 0,Brain,Subdivided,Hemisphere,age,count,scaled count
0,n11_20241103_p1_7,area 29,contralateral,p1-7,552,0.032377
1,n11_20241103_p1_7,area 29,ipsilateral,p1-7,3,0.000176
2,n11_20241103_p1_7,area 30,contralateral,p1-7,832,0.048801
3,n1_20241203_p7_13,area 29,contralateral,p7-13,3,0.001943
4,n1_20241203_p7_13,area 29,ipsilateral,p7-13,391,0.253238
5,n1_20241203_p7_13,area 30,contralateral,p7-13,12,0.007772
6,n1_20241203_p7_13,area 30,ipsilateral,p7-13,89,0.057642
7,n1_20241209_p7_13,area 29,contralateral,p7-13,3,0.001943
8,n1_20241209_p7_13,area 29,ipsilateral,p7-13,391,0.253238
9,n1_20241209_p7_13,area 30,contralateral,p7-13,12,0.007772


In [74]:
def plot_count_zoomed_sub(big_file_long, Title):

    df_hemi = big_file_long[big_file_long["Hemisphere"] == "ipsilateral"]

    fig = px.box(
        df_hemi,
        y="count",
        x="Subdivided",
        color="age",
        # points="all",
        color_discrete_map={"p1-7": "darkorange", "p7-13": "magenta"},
        title=Title,
        
        labels={"count": "Count", "Subdivided": "Subdivided region"},
    )
    fig.update_layout(
        legend=dict(font=dict(size=20)),
        xaxis_title_font=dict(size=24),  # x-axis label font size
        yaxis_title_font=dict(size=24),  # y-axis label font size
        xaxis_tickfont=dict(size=20),  # x-axis tick font size
        yaxis_tickfont=dict(size=18),  # y-axis tick font size
    )
    return fig

In [75]:
plot_count_zoomed_sub(
    subdivided_rsc_grouped,
    "V1 injections counts in Retrosplenial area subdivisions by hemisphere, p1-7 (n=3) and p7-13 (n=9) ",
)

In [76]:
def plot_scaledcount_zoomed_sub(big_file_long, Title):

    df_hemi = big_file_long[big_file_long["Hemisphere"] == "ipsilateral"]

    fig = px.box(
        df_hemi,
        y="scaled count",
        x="Subdivided",
        color="age",
        # points = "all",
        color_discrete_map={"p1-7": "orange", "p7-13": "magenta"},
        title=Title,
        labels={"count": "Count", "Subdivided": "Subdivided region"},
    )
    fig.update_layout(
        legend=dict(font=dict(size=20)),
        xaxis_title_font=dict(size=24),  # x-axis label font size
        yaxis_title_font=dict(size=24),  # y-axis label font size
        xaxis_tickfont=dict(size=20),  # x-axis tick font size
        yaxis_tickfont=dict(size=18),  # y-axis tick font size
    )

    return fig

In [77]:
plot_scaledcount_zoomed_sub(
    subdivided_rsc_grouped,
    "V1 injections scaled counts in Retrosplenial area subdivisions by hemisphere, p1-7 (n=3) and p7-13 (n=9) ",
)

In [78]:
plot_count_zoomed_sub(
    Somato_areas,
    "Primary Somatosensory areas by hemisphere, p1-7 (n=3) and p7-13 (n=9)",
)

In [79]:
plot_scaledcount_zoomed_sub(
    Somato_areas, ", Somatosensory areas, p1-7 (n=3) and p7-13 (n=9)"
)

In [80]:
# dataframe with only the primary somatosensory areas and added Special Interest
primary_somatosensory_areas = Somato_areas[
    Somato_areas["Subdivided"].isin(["Primary somatosensory area", "Special Interest"])
].copy()

primary_somatosensory_areas.insert(
    8, "Special Interest", [0] * len(primary_somatosensory_areas)
)

for index, row in primary_somatosensory_areas.iterrows():
    region_id = row["Region ID"]
    special_interest = regions_complete.loc[
        regions_complete["Region ID"] == region_id, "Special Interest"
    ].values[0]
    # add the Subdivided values to the Somatosensory_areas DataFrame
    primary_somatosensory_areas.at[index, "Special Interest"] = special_interest
primary_somatosensory_areas


Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value 'mouth' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



Unnamed: 0,Brain,Region ID,General area,Hemisphere,count,scaled count,age,Subdivided,Special Interest
33,n3_20241010_p1_7,1102,Somatosensory areas,ipsilateral,2,0.041127,p1-7,Primary somatosensory area,mouth
38,n3_20241010_p1_7,854,Somatosensory areas,ipsilateral,2,0.041127,p1-7,Primary somatosensory area,upper limb
48,n3_20241010_p1_7,201,Somatosensory areas,contralateral,1,0.020563,p1-7,Primary somatosensory area,S1BF
50,n3_20241010_p1_7,478,Somatosensory areas,contralateral,1,0.020563,p1-7,Primary somatosensory area,lower limb
58,n3_20241010_p1_7,1030,Somatosensory areas,ipsilateral,1,0.020563,p1-7,Primary somatosensory area,lower limb
...,...,...,...,...,...,...,...,...,...
3308,n4_20241209_p7_13,1062,Somatosensory areas,ipsilateral,2,0.024266,p7-13,Primary somatosensory area,S1BF
3454,n4_20241209_p7_13,950,Somatosensory areas,contralateral,1,0.012133,p7-13,Primary somatosensory area,mouth
3457,n4_20241209_p7_13,974,Somatosensory areas,contralateral,1,0.012133,p7-13,Primary somatosensory area,mouth
3464,n4_20241209_p7_13,1111,Somatosensory areas,contralateral,2,0.024266,p7-13,Primary somatosensory area,trunk


In [81]:
subdivided_S1_filtered = primary_somatosensory_areas[
    ["Brain", "age", "count", "Hemisphere", "Special Interest"]
]

subdivided_S1_grouped = subdivided_S1_filtered.groupby(
    ["Brain", "Special Interest", "Hemisphere", "age"], as_index=False
)["count"].sum()

# add scaled column with 0
subdivided_S1_grouped["scaled count"] = 0
# scale the counts
for index, row in subdivided_S1_grouped.iterrows():
    brain_name = row["Brain"]
    count = row["count"]
    total_count = totals.loc[totals["Brain"] == brain_name, "total"].values[0]
    scaled_count = 1 / total_count * count
    subdivided_S1_grouped.loc[index, "scaled count"] = scaled_count
subdivided_S1_grouped


Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '0.003401959059182357' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



Unnamed: 0,Brain,Special Interest,Hemisphere,age,count,scaled count
0,n11_20241103_p1_7,S1BF,contralateral,p1-7,58,0.003402
1,n11_20241103_p1_7,S1BF,ipsilateral,p1-7,8,0.000469
2,n11_20241103_p1_7,lower limb,contralateral,p1-7,24,0.001408
3,n11_20241103_p1_7,lower limb,ipsilateral,p1-7,3,0.000176
4,n11_20241103_p1_7,mouth,contralateral,p1-7,76,0.004458
...,...,...,...,...,...,...
95,n5_20241203_p7_13,upper limb,contralateral,p7-13,24,0.008293
96,n5_20241203_p7_13,upper limb,ipsilateral,p7-13,1,0.000346
97,n8_20241203_p7_13,S1BF,ipsilateral,p7-13,3,0.007958
98,n8_20241203_p7_13,mouth,contralateral,p7-13,2,0.005305


In [82]:
def plot_count_zoomed_spec(big_file_long, Title):

    df_hemi = big_file_long[big_file_long["Hemisphere"] == "ipsilateral"]

    area_order_count = (
        df_hemi.groupby("Special Interest")["count"]
        .max()
        .sort_values(ascending=False)
        .index.tolist()
    )

    fig = px.box(
        df_hemi,
        y="count",
        x="Special Interest",
        color="age",
        # points = "all",
        color_discrete_map={"p1-7": "orange", "p7-13": "magenta"},
        title=Title,
        category_orders={"General area": area_order_count},
        labels={"count": "Count", "Special Interest": "Subdivided region"},
    )
    fig.update_layout(
        legend=dict(font=dict(size=20)),
        xaxis_title_font=dict(size=24),  # x-axis label font size
        yaxis_title_font=dict(size=24),  # y-axis label font size
        xaxis_tickfont=dict(size=20),  # x-axis tick font size
        yaxis_tickfont=dict(size=18),  # y-axis tick font size
    )

    return fig

In [83]:
def plot_scaledcount_zoomed_spec(big_file_long, Title):

    df_hemi = big_file_long[big_file_long["Hemisphere"] == "ipsilateral"]
    area_order_scaled_count = (
        df_hemi.groupby("Special Interest")["count"]
        .max()
        .sort_values(ascending=False)
        .index.tolist()
    )

    fig = px.box(
        df_hemi,
        y="scaled count",
        x="Special Interest",
        color="age",
        # points = "all",
        color_discrete_map={"p1-7": "orange", "p7-13": "magenta"},
        title=Title,
        category_orders={"General area": area_order_scaled_count},
        labels={"count": "Count", "Special Interest": "Subdivided region"},
    )
    fig.update_layout(
        legend=dict(font=dict(size=20)),
        xaxis_title_font=dict(size=24),  # x-axis label font size
        yaxis_title_font=dict(size=24),  # y-axis label font size
        xaxis_tickfont=dict(size=20),  # x-axis tick font size
        yaxis_tickfont=dict(size=18),  # y-axis tick font size
    )

    return fig

In [84]:
plot_count_zoomed_spec(
    subdivided_S1_grouped, "Primary Somatosensory areas, p1-7 (n=3) and p7-13 (n=9)"
)

In [85]:
plot_scaledcount_zoomed_spec(
    subdivided_S1_grouped,
    "scaled Primary Somatosensory areas, p1-7 (n=3) and p7-13 (n=9)",
)