Loads the dataset into dataframe object 'df'

In [1]:
import pyreadr
import pandas as pd
# Read the .RDS file
result = pyreadr.read_r('meta_ss_3_29_22.RDS')

# Extract the data
df = result[None]  # Extract the dataframe from the result

Testing that the dataset was set properly

In [2]:
df = df[df['class'].isin(['inhibitory', 'excitatory'])]

INH_NEURONS = ['Sst', 'Vip', 'Pvalb', 'Lamp5', 'Sncg', 'Pax6', 'Lamp5 Lhx6', 'Chandelier', 'Sst Chodl']
EXC_NEURONS = ['L2/3 IT', 'L4 IT', 'L5 IT', 'L6 IT', 'L6b', 'L6 CT', 'L6 IT Car3', 'L5 ET', 'L5/6 NP']

# Now, get the value counts for the neuronal types
value_counts = df['within_area_subclass'].value_counts()
print(value_counts.sum())
value_counts

53697


within_area_subclass
L2/3 IT       13259
L5 IT          6326
L4 IT          5722
L6 IT          5041
Sst            4840
Vip            3201
Pvalb          3187
L6 CT          2938
Lamp5          2060
L6b            1870
L6 IT Car3     1313
Sncg           1121
L5/6 NP        1034
Lamp5 Lhx6      912
Pax6            376
L5 ET           227
Chandelier      205
Sst Chodl        65
Name: count, dtype: int64

In [3]:
from itertools import product
# Extract all possible subregions and layers
pd.set_option('display.max_rows', None)
subregions = df['subregion'].unique()
layers = df['layer'].unique()

# Get all possible combinations of subregions and layers
combinations = list(product(subregions, layers))

# Create a DataFrame to store the counts
result = pd.DataFrame(combinations, columns=['subregion', 'layer'])

# Function to count within_area_subclass
def count_within_area_subclass(subregion, layer):
    return df[(df['subregion'] == subregion) & (df['layer'] == layer)]['within_area_subclass'].value_counts().to_dict()

# Apply the function to get counts
result['counts'] = result.apply(lambda row: count_within_area_subclass(row['subregion'], row['layer']), axis=1)

# Expand the counts dictionary into separate columns
counts_df = result['counts'].apply(pd.Series).fillna(0)
final_result = pd.concat([result.drop(columns=['counts']), counts_df], axis=1)

# Display the result


In [4]:
# Filter out rows where all within_area_subclass columns have zero values
columns_to_check = final_result.columns[2:]  # Assuming the first two columns are 'subregion' and 'layer'
filtered_df = final_result[(final_result[columns_to_check] != 0).any(axis=1)]

filtered_df.reset_index(drop=True, inplace=True)
# Display the filtered DataFrame
ordered_columns = ['subregion', 'layer'] + EXC_NEURONS + INH_NEURONS

filtered_df = filtered_df[ordered_columns]

print("\nFiltered DataFrame:")
filtered_df


Filtered DataFrame:


Unnamed: 0,subregion,layer,L2/3 IT,L4 IT,L5 IT,L6 IT,L6b,L6 CT,L6 IT Car3,L5 ET,L5/6 NP,Sst,Vip,Pvalb,Lamp5,Sncg,Pax6,Lamp5 Lhx6,Chandelier,Sst Chodl
0,A1,L6b,6.0,1.0,8.0,183.0,57.0,112.0,21.0,0.0,4.0,16.0,10.0,19.0,8.0,0.0,1.0,16.0,7.0,0.0
1,A1,L1,167.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,19.0,32.0,0.0,85.0,105.0,27.0,1.0,0.0,0.0
2,A1,L2,483.0,1.0,2.0,1.0,1.0,0.0,0.0,0.0,0.0,75.0,44.0,33.0,19.0,22.0,3.0,7.0,1.0,12.0
3,A1,L6a,8.0,4.0,93.0,84.0,34.0,59.0,61.0,0.0,16.0,54.0,7.0,40.0,6.0,2.0,0.0,11.0,2.0,0.0
4,A1,WM,7.0,2.0,2.0,78.0,49.0,58.0,10.0,0.0,0.0,9.0,9.0,13.0,7.0,1.0,0.0,19.0,1.0,1.0
5,A1,L3,785.0,58.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,69.0,108.0,71.0,12.0,2.0,2.0,6.0,11.0,0.0
6,A1,L4,288.0,465.0,233.0,2.0,0.0,0.0,5.0,5.0,10.0,201.0,109.0,127.0,50.0,0.0,0.0,8.0,19.0,0.0
7,A1,L5,20.0,140.0,413.0,23.0,5.0,15.0,66.0,7.0,74.0,184.0,40.0,108.0,17.0,1.0,2.0,19.0,9.0,1.0
8,ACC,L1,114.0,1.0,5.0,2.0,0.0,0.0,1.0,2.0,3.0,75.0,98.0,11.0,107.0,184.0,43.0,11.0,3.0,0.0
9,ACC,L2_3,874.0,52.0,75.0,7.0,0.0,0.0,0.0,5.0,1.0,203.0,233.0,185.0,122.0,53.0,20.0,27.0,13.0,0.0


To calculate the excitatory and inhibitory fractions as well as the percentage that each neuron type is of the total excitatory and inhibitory types respectively

In [5]:
ei_df = df.groupby(['subregion', 'layer', 'class']).size().reset_index(name='count')

ei_df

Unnamed: 0,subregion,layer,class,count
0,A1,L1,excitatory,167
1,A1,L1,inhibitory,269
2,A1,L2,excitatory,488
3,A1,L2,inhibitory,216
4,A1,L3,excitatory,848
5,A1,L3,inhibitory,281
6,A1,L4,excitatory,1008
7,A1,L4,inhibitory,514
8,A1,L5,excitatory,763
9,A1,L5,inhibitory,381


In [6]:
total_counts_df = ei_df.groupby(['subregion', 'layer'])['count'].sum().reset_index(name='total_count')

ei_df = ei_df.merge(total_counts_df, on=['subregion', 'layer'])

ei_df['fraction'] = ei_df['count'] / ei_df['total_count']

print("Final DataFrame with Fractions:")
ei_df

Final DataFrame with Fractions:


Unnamed: 0,subregion,layer,class,count,total_count,fraction
0,A1,L1,excitatory,167,436,0.383028
1,A1,L1,inhibitory,269,436,0.616972
2,A1,L2,excitatory,488,704,0.693182
3,A1,L2,inhibitory,216,704,0.306818
4,A1,L3,excitatory,848,1129,0.751107
5,A1,L3,inhibitory,281,1129,0.248893
6,A1,L4,excitatory,1008,1522,0.662286
7,A1,L4,inhibitory,514,1522,0.337714
8,A1,L5,excitatory,763,1144,0.666958
9,A1,L5,inhibitory,381,1144,0.333042


In [7]:
exc_data = ei_df[ei_df['class'] == 'excitatory'].copy()
inh_data = ei_df[ei_df['class'] == 'inhibitory'].copy()

# Drop the 'class' column as it's no longer needed
exc_data.drop(columns=['class'], inplace=True)
inh_data.drop(columns=['class'], inplace=True)

# Rename columns for merging
exc_data.rename(columns={'count': 'exc_count', 'fraction': 'exc_fraction'}, inplace=True)
inh_data.rename(columns={'count': 'inh_count', 'fraction': 'inh_fraction'}, inplace=True)

# Merge the DataFrames on 'subregion' and 'layer'
ei_df = pd.merge(exc_data, inh_data, on=['subregion', 'layer'], how='outer')

# Reorder the columns
ei_df = ei_df[['subregion', 'layer', 'exc_count', 'inh_count', 'exc_fraction', 'inh_fraction']]

# Display the final DataFrame
print("\nFinal DataFrame:")
ei_df


Final DataFrame:


Unnamed: 0,subregion,layer,exc_count,inh_count,exc_fraction,inh_fraction
0,A1,L1,167,269,0.383028,0.616972
1,A1,L2,488,216,0.693182,0.306818
2,A1,L3,848,281,0.751107,0.248893
3,A1,L4,1008,514,0.662286,0.337714
4,A1,L5,763,381,0.666958,0.333042
5,A1,L6a,359,122,0.746362,0.253638
6,A1,L6b,392,77,0.835821,0.164179
7,A1,WM,206,60,0.774436,0.225564
8,ACC,L1,128,532,0.193939,0.806061
9,ACC,L2_3,1014,856,0.542246,0.457754


In [8]:
#sorting layers and subregions and making them uniform between dataframes for division purposes and general readability
#fix rows with layers



#filtered_df[INH_NEURONS] = filtered_df[INH_NEURONS].div(ei_df['inh_count'], axis=0)
#filtered_df[EXC_NEURONS] = filtered_df[EXC_NEURONS].div(ei_df['exc_count'], axis=0)
filtered_df

Unnamed: 0,subregion,layer,L2/3 IT,L4 IT,L5 IT,L6 IT,L6b,L6 CT,L6 IT Car3,L5 ET,L5/6 NP,Sst,Vip,Pvalb,Lamp5,Sncg,Pax6,Lamp5 Lhx6,Chandelier,Sst Chodl
0,A1,L4ab,6.0,1.0,8.0,183.0,57.0,112.0,21.0,0.0,4.0,16.0,10.0,19.0,8.0,0.0,1.0,16.0,7.0,0.0
1,A1,L6b,167.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,19.0,32.0,0.0,85.0,105.0,27.0,1.0,0.0,0.0
2,A1,L1,483.0,1.0,2.0,1.0,1.0,0.0,0.0,0.0,0.0,75.0,44.0,33.0,19.0,22.0,3.0,7.0,1.0,12.0
3,A1,L4c,8.0,4.0,93.0,84.0,34.0,59.0,61.0,0.0,16.0,54.0,7.0,40.0,6.0,2.0,0.0,11.0,2.0,0.0
4,A1,L5,7.0,2.0,2.0,78.0,49.0,58.0,10.0,0.0,0.0,9.0,9.0,13.0,7.0,1.0,0.0,19.0,1.0,1.0
5,A1,L2,785.0,58.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,69.0,108.0,71.0,12.0,2.0,2.0,6.0,11.0,0.0
6,A1,L6a,288.0,465.0,233.0,2.0,0.0,0.0,5.0,5.0,10.0,201.0,109.0,127.0,50.0,0.0,0.0,8.0,19.0,0.0
7,A1,L5,20.0,140.0,413.0,23.0,5.0,15.0,66.0,7.0,74.0,184.0,40.0,108.0,17.0,1.0,2.0,19.0,9.0,1.0
8,ACC,L6b,114.0,1.0,5.0,2.0,0.0,0.0,1.0,2.0,3.0,75.0,98.0,11.0,107.0,184.0,43.0,11.0,3.0,0.0
9,ACC,L2_3,874.0,52.0,75.0,7.0,0.0,0.0,0.0,5.0,1.0,203.0,233.0,185.0,122.0,53.0,20.0,27.0,13.0,0.0
