In [2]:
import pandas as pd
from collections import Counter

# Read the CSV data
mouse_metadata = pd.read_csv(r"D:\Mouse\Notebooks\mouse_data_cluster_metadata.csv")

# Group by 'CTX.cluster_id' and get the full list of 'subclass_id's for each cluster.
cluster_mapping_full = mouse_metadata.groupby('CTX.cluster_id')['subclass_id'].apply(list).to_dict()

# Container for clusters that do not have a uniquely most common subclass_id
clusters_without_unique_mode = {}

# Process each cluster group to check for a unique mode.
for cluster_id, subclass_list in cluster_mapping_full.items():
    # Consider only clusters with more than one entry
    if len(subclass_list) > 1:
        count = Counter(subclass_list)
        # Get the counts sorted from most to least common
        most_common = count.most_common()
        # If the two most common have the same frequency, then there is no unique most common value.
        if len(most_common) >= 2 and most_common[0][1] == most_common[1][1]:
            clusters_without_unique_mode[cluster_id] = dict(count)

# Print only the keys (and their counts) that did not have a unique most common value.
for cluster_id, counts in clusters_without_unique_mode.items():
    print(f"Cluster ID: {cluster_id}, counts: {counts}")

Cluster ID: 10.0, counts: {48.0: 1, 49.0: 1}
Cluster ID: 25.0, counts: {47.0: 1, 48.0: 1}
Cluster ID: 58.0, counts: {46.0: 1, 48.0: 1}
Cluster ID: 107.0, counts: {48.0: 1, 53.0: 1}
Cluster ID: 116.0, counts: {52.0: 1, 262.0: 1}
Cluster ID: 163.0, counts: {7.0: 1, 14.0: 1}
Cluster ID: 164.0, counts: {7.0: 1, 73.0: 1}
Cluster ID: 175.0, counts: {9.0: 1, 14.0: 1, 113.0: 1}
Cluster ID: 193.0, counts: {6.0: 1, 203.0: 1}
Cluster ID: 195.0, counts: {5.0: 1, 114.0: 1}
Cluster ID: 213.0, counts: {2.0: 1, 3.0: 1}
Cluster ID: 226.0, counts: {2.0: 1, 3.0: 1, 4.0: 1}
Cluster ID: 237.0, counts: {1.0: 1, 7.0: 1}
Cluster ID: 304.0, counts: {28.0: 2, 29.0: 2}
Cluster ID: 316.0, counts: {27.0: 2, 29.0: 2}
Cluster ID: 344.0, counts: {14.0: 1, 16.0: 1}
Cluster ID: 387.0, counts: {335.0: 1, 337.0: 1}
Cluster ID: 388.0, counts: {335.0: 1, 336.0: 1}


In [3]:
# Checking names:
cell = 16
CTXHPF_metadata = pd.read_csv(r"D:\Mouse\Notebooks\Metadata_CTXHPF_Cells_Link_Mouse_Paper.csv",usecols=['cell_type_alias_id','sample_name','cell_type_alias_label'])
a = CTXHPF_metadata[CTXHPF_metadata['cell_type_alias_id']==cell]['cell_type_alias_label']
print(a)
mouse_metadata = pd.read_csv(r"D:\Mouse\Notebooks\mouse_data_cluster_metadata.csv",usecols=['CTX.cluster_id','subclass_label','subclass_id'])
b = mouse_metadata[mouse_metadata['CTX.cluster_id']==cell][['subclass_label','subclass_id']]
print(b)

#most like name like 10 Lamp5 Gaba more like 10_lamp5 than RHP-COA Ndnf Gaba
#10:49
#25:47
#58:46
#107:53
#116:52
#163:7
#164:7
#175:9
#193:6
#213:3
#226:4
#237:1
#304:29
#316:29
#344:16
#387:335
#388:335

83         16_Lamp5
137        16_Lamp5
166        16_Lamp5
179        16_Lamp5
207        16_Lamp5
             ...   
1104282    16_Lamp5
1104394    16_Lamp5
1104627    16_Lamp5
1105149    16_Lamp5
1105376    16_Lamp5
Name: cell_type_alias_label, Length: 2169, dtype: object
    subclass_label  subclass_id
587   OB Dopa-Gaba         44.0
715     Lamp5 Gaba         49.0
716     Lamp5 Gaba         49.0


In [9]:
import json
# Saving mappings:
# Build a dictionary for clusters that have either:
#  - a single unique subclass value, or 
#  - more than one value but with a unique most common subclass value.
unique_mode_mapping = {}

for cluster_id, subclass_list in cluster_mapping_full.items():
    cnt = Counter(subclass_list)
    # If the cluster has exactly one unique subclass, record that value.
    if len(cnt) == 1:
        unique_mode_mapping[cluster_id] = list(cnt.keys())[0]
    # Otherwise, if it has multiple subclass entries, check for a unique mode.
    else:
        # Get the two most common subclass values.
        most_common = cnt.most_common(2)
        # If the top subclass count is greater than the second one, we have a unique mode.
        if most_common[0][1] > most_common[1][1]:
            unique_mode_mapping[cluster_id] = most_common[0][0]

provided_mappings = {
    10: 49,
    25: 47,
    58: 46,
    107: 53,
    116: 52,
    163: 7,
    164: 7,
    175: 9,
    193: 6,
    213: 3,
    226: 4,
    237: 1,
    304: 29,
    316: 29,
    344: 16,
    387: 335,
    388: 335
}

unique_mode_mapping.update(provided_mappings)

print(unique_mode_mapping)
with open("D:\\Mouse\\Notebooks\\CTXHPFcluster_to_Mousesubclass_mapping.json",'w') as f:
    json.dump(unique_mode_mapping,f)

{1.0: 36.0, 2.0: 44.0, 3.0: 41.0, 4.0: 48.0, 5.0: 50.0, 6.0: 50.0, 7.0: 50.0, 8.0: 50.0, 9.0: 50.0, 11.0: 49.0, 12.0: 49.0, 14.0: 47.0, 15.0: 49.0, 16.0: 49.0, 17.0: 49.0, 19.0: 49.0, 21.0: 47.0, 24.0: 47.0, 26.0: 48.0, 27.0: 48.0, 28.0: 48.0, 29.0: 48.0, 30.0: 48.0, 31.0: 47.0, 32.0: 47.0, 33.0: 47.0, 34.0: 47.0, 35.0: 47.0, 36.0: 47.0, 37.0: 47.0, 39.0: 47.0, 41.0: 47.0, 42.0: 46.0, 43.0: 46.0, 44.0: 46.0, 45.0: 46.0, 46.0: 46.0, 47.0: 46.0, 48.0: 46.0, 49.0: 46.0, 50.0: 46.0, 51.0: 46.0, 53.0: 46.0, 55.0: 46.0, 56.0: 46.0, 59.0: 46.0, 60.0: 46.0, 61.0: 46.0, 62.0: 49.0, 63.0: 56.0, 64.0: 56.0, 67.0: 53.0, 68.0: 53.0, 69.0: 53.0, 70.0: 53.0, 71.0: 53.0, 72.0: 53.0, 74.0: 57.0, 75.0: 53.0, 76.0: 53.0, 77.0: 53.0, 78.0: 53.0, 79.0: 53.0, 80.0: 53.0, 81.0: 53.0, 82.0: 53.0, 84.0: 53.0, 85.0: 53.0, 87.0: 75.0, 88.0: 53.0, 89.0: 53.0, 90.0: 53.0, 91.0: 53.0, 93.0: 53.0, 94.0: 53.0, 96.0: 53.0, 97.0: 53.0, 98.0: 53.0, 99.0: 53.0, 100.0: 53.0, 102.0: 53.0, 103.0: 53.0, 104.0: 53.0, 105.0: 5