In [1]:
import pandas as pd
import requests
import numpy as np

In [2]:
import xml.etree.ElementTree as et  # needed because the API data are in XML format

In [49]:
from nilearn import *

In [4]:
url = "http://api.brain-map.org/api/v2/data/query.xml"

In [5]:
# from the Allen Brain website on their API, the criteria of the RMA queries for extracting the data on all human brain cell types are: 
# model::ApiCellTypesSpecimenDetail,
# rma::criteria,[donor__species$il'homo sapiens'],
# rma::options[num_rows$eqall]

criteria = "model::ApiCellTypesSpecimenDetail, rma::criteria, [donor__species$il'homo sapiens'], rma::options[num_rows$eqall]"

In [6]:
response = requests.get(url, params = {"criteria":criteria})  #requesting the url with those criteria

In [7]:
response.status_code

200

In [8]:
data = response.text

In [9]:
print(data)[:50]

<Response success='true' start_row='0' num_rows='413' total_rows='413'><api-cell-types-specimen-details>
  <api-cell-typess-specimen-details>
    <cell-reporter-status nil="true"/>
    <csl--normalized-depth nil="true"/>
    <csl--x>273.0</csl--x>
    <csl--y>354.0</csl--y>
    <csl--z>216.0</csl--z>
    <donor--age>25 yrs</donor--age>
    <donor--disease-state>epilepsy</donor--disease-state>
    <donor--id>524848408</donor--id>
    <donor--name>H16.03.003</donor--name>
    <donor--race>White or Caucasian</donor--race>
    <donor--sex>Male</donor--sex>
    <donor--species>Homo Sapiens</donor--species>
    <donor--years-of-seizure-history>9</donor--years-of-seizure-history>
    <ef--adaptation>0.0278459596639436</ef--adaptation>
    <ef--avg-firing-rate>13.5725111407696</ef--avg-firing-rate>
    <ef--avg-isi>73.6783333333333</ef--avg-isi>
    <ef--f-i-curve-slope>0.1671875</ef--f-i-curve-slope>
    <ef--fast-trough-v-long-square>-53.8750038146973</ef--fast-trough-v-long-square>
    <ef-

TypeError: 'NoneType' object is not subscriptable

In [10]:
# I saw that "data" is a string so I used the method fromstring() of ElementTree library that returns an elementTree object
# (that has a "tree" structure). The element is called "root" because it represents the root element of the XML file from which I can access
# all the other elements of the file ("children", attributes and so on)

root = et.fromstring(data)
root

<Element 'Response' at 0x0000023466AC8B30>

In [11]:
def extract_data(xml_root_element):

    """ Function to extract the data from the first tag of the XML file string, contained in the main root (Response). It returns a dictionary 
    having the tags (attribute .tag) of the child elements (for example "donor age" in <donor--age>25 yrs</donor--age>) as keys and 
    the text content (attribute .text) as value ("25 yrs" in <donor--age>25 yrs</donor--age>). 
    -> This function can be executed inside the create_dataframe() function of function.py"""
    
    data = {}
    for child in xml_root_element:
        data[child.tag] = child.text
    return data    

In [12]:
def create_dataframe(first_tag:str):
    """ The function creates a dataframe from XLM files from which the data were extracted with extract_data() function.
    As argument it takes the first tag (string) within the root that incorporates all the elements associated"""
    df_rows = []
    for x in root.findall(first_tag):
        df_rows.append(extract_data(x))

    df = pd.DataFrame(df_rows)

    return df

In [13]:
df = create_dataframe(".//api-cell-typess-specimen-details")

In [14]:
df.shape

(413, 54)

In [15]:
# first tried small modules to create the dataframe (then definec the function incorporating all
# 1. df_rows = [extract_data(x) for x in root.findall(".//api-cell-typess-specimen-details")]  # list containing the dictionary "data"

In [16]:
# 2. df_donors = pd.DataFrame(df_rows)

In [17]:
df.to_csv("raw_donors_data.csv")

In [18]:
df.columns

Index(['cell-reporter-status', 'csl--normalized-depth', 'csl--x', 'csl--y',
       'csl--z', 'donor--age', 'donor--disease-state', 'donor--id',
       'donor--name', 'donor--race', 'donor--sex', 'donor--species',
       'donor--years-of-seizure-history', 'ef--adaptation',
       'ef--avg-firing-rate', 'ef--avg-isi', 'ef--f-i-curve-slope',
       'ef--fast-trough-v-long-square', 'ef--peak-t-ramp', 'ef--ri', 'ef--tau',
       'ef--threshold-i-long-square',
       'ef--upstroke-downstroke-ratio-long-square', 'ef--vrest',
       'ephys-inst-thresh-thumb-path', 'ephys-thumb-path', 'erwkf--id',
       'line-name', 'm--biophys', 'm--biophys-all-active',
       'm--biophys-perisomatic', 'm--glif', 'morph-thumb-path',
       'nr--average-contraction', 'nr--average-parent-daughter-ratio',
       'nr--max-euclidean-distance', 'nr--number-bifurcations',
       'nr--number-stems', 'nr--reconstruction-type', 'nrwkf--id',
       'si--height', 'si--path', 'si--width', 'specimen--hemisphere',
       's

In [19]:
df1 = df.copy()

In [20]:
def rename_ef(df):
    "Function that renames the column starting with ef-- (=electrophysiology), by removing ef--"""
    df.columns = [x.replace("ef--", "") if x.startswith("ef--") else x for x in df.columns]

    return df

In [21]:
df1 = rename_ef(df1)
df1.columns

Index(['cell-reporter-status', 'csl--normalized-depth', 'csl--x', 'csl--y',
       'csl--z', 'donor--age', 'donor--disease-state', 'donor--id',
       'donor--name', 'donor--race', 'donor--sex', 'donor--species',
       'donor--years-of-seizure-history', 'adaptation', 'avg-firing-rate',
       'avg-isi', 'f-i-curve-slope', 'fast-trough-v-long-square',
       'peak-t-ramp', 'ri', 'tau', 'threshold-i-long-square',
       'upstroke-downstroke-ratio-long-square', 'vrest',
       'ephys-inst-thresh-thumb-path', 'ephys-thumb-path', 'erwkf--id',
       'line-name', 'm--biophys', 'm--biophys-all-active',
       'm--biophys-perisomatic', 'm--glif', 'morph-thumb-path',
       'nr--average-contraction', 'nr--average-parent-daughter-ratio',
       'nr--max-euclidean-distance', 'nr--number-bifurcations',
       'nr--number-stems', 'nr--reconstruction-type', 'nrwkf--id',
       'si--height', 'si--path', 'si--width', 'specimen--hemisphere',
       'specimen--id', 'specimen--name', 'structure--acronym

In [22]:
df1["donor--age"].unique()  # age is a string and I want it to be an int

array(['25 yrs', '33 yrs', '48 yrs', '26 yrs', '27 yrs', '37 yrs',
       '18 yrs', '83 yrs', '38 yrs', '35 yrs', '39 yrs', '34 yrs',
       '24 yrs', '28 yrs', '29 yrs', '67 yrs', '52 yrs', '71 yrs',
       '23 yrs', '41 yrs', '42 yrs', '65 yrs', '60 yrs', '61 yrs',
       '31 yrs', '19 yrs', '47 yrs', '30 yrs'], dtype=object)

In [23]:
def n_age(df):
    
    df["donor--age"] = df["donor--age"].str.replace(" yrs", "")
    df["donor--age"] = df["donor--age"].apply(lambda x: int(x) if pd.notnull(x) else np.nan)

    return df

In [24]:
df1 = n_age(df1)
df1["donor--age"].unique()

array([25, 33, 48, 26, 27, 37, 18, 83, 38, 35, 39, 34, 24, 28, 29, 67, 52,
       71, 23, 41, 42, 65, 60, 61, 31, 19, 47, 30], dtype=int64)

In [25]:
df1["avg-firing-rate"].unique()

array(['13.5725111407696', '89.2857142857151', '77.445126052133',
       '1.68799162756153', '207.900207900204', '15.8881474420083', None,
       '2.64781422935367', '12.9198966408269', '82.9187396351575',
       '15.7351460221551', '159.744408945689', '12.8736026777094',
       '9.77517106549365', '14.7766613189226', '4.06658352762565',
       '35.1370344342935', '19.721409146582', '45.2656269289329',
       '3.86787344318094', '27.3822562979189', '89.126559714795',
       '4.79041916167665', '17.6260783012608', '7.42390497401633',
       '172.413793103451', '8.90908280992472', '40.9987290393998',
       '9.65893236600915', '4.93644329260768', '25.0229376928852',
       '13.6994962800599', '13.3336296362141', '27.5330396475772',
       '199.203187250995', '7.26392251815979', '15.3300337260742',
       '33.5359463424859', '11.2079980273923', '5.93647966755714',
       '10.8232136285906', '30.5872756933116', '5.84453535943892',
       '29.1873410332318', '8.74497164130622', '40.71661237

In [26]:
def format_firing(df):
    """ Function that converts the firing rates object in floats with two decimals. 
    It handles NaN values and leaves them as they are, if present"""
    
    df["avg-firing-rate"] = df["avg-firing-rate"].apply(lambda x: round(float(x), 2) if pd.notnull(x) else np.nan)

    return df

In [27]:
df1 = format_firing(df1)
df1["avg-firing-rate"].info()

<class 'pandas.core.series.Series'>
RangeIndex: 413 entries, 0 to 412
Series name: avg-firing-rate
Non-Null Count  Dtype  
--------------  -----  
357 non-null    float64
dtypes: float64(1)
memory usage: 3.4 KB


In [28]:
def format_curve_slope(df):
    """ Function that converts values of f-i curve slope from strings to floats with 3 decimals.
    It handles NaN values and leaves them as they are, if present"""
    
    df["f-i-curve-slope"] = df["f-i-curve-slope"].apply(lambda x: round(float(x), 3) if pd.notnull(x) else np.nan)

    return df

In [29]:
df1 = format_curve_slope(df1)

In [30]:
df_ephys_age = df1.groupby(["donor--age"]).agg({"avg-firing-rate" : ["mean" , "std"] , "f-i-curve-slope": ["mean" , "std"]})
df_ephys_age

Unnamed: 0_level_0,avg-firing-rate,avg-firing-rate,f-i-curve-slope,f-i-curve-slope
Unnamed: 0_level_1,mean,std,mean,std
donor--age,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
18,15.713333,6.423043,0.25,0.108963
19,15.38,5.47854,0.133667,0.10289
23,17.906,12.335063,0.3275,0.253901
24,30.657674,37.621927,0.175681,0.177632
25,33.706,46.048165,0.133,0.074596
26,49.378889,57.138502,0.114273,0.135157
27,68.913571,89.188853,0.095118,0.109416
28,11.130769,8.264071,0.151533,0.138541
29,81.884,56.629511,0.560167,0.29446
30,4.43,1.838478,0.0705,0.026163


In [31]:
def age_category(age):
    """Functions that creates age categories. 
    It returns different categories of age, so can be applied to the column "donor--age" 
    and to create a new column where each donor has an assignedage category"""

    if isinstance(age, int):
        if 18 <= age <= 24:
            return "18-24 years"
        elif 25 <= age <= 30:
            return "25-30 years"
        elif 31 <= age <= 40:
            return "31-40 years"
        elif 41 <= age <= 50 :
            return "41-50 years"
        elif 51 <= age <= 65:
            return "51-65 years"
        elif 66 <= age <= 83:
            return "66-83 years"
        else:
            return np.nan
    else:
        return np.nan

In [32]:
df1["age-category"] = df1["donor--age"].apply(age_category)
df1["age-category"].unique()

array(['25-30 years', '31-40 years', '41-50 years', '18-24 years',
       '66-83 years', '51-65 years'], dtype=object)

In [33]:
df_ephys_age = df1.groupby(["age-category"]).agg({"avg-firing-rate" : ["mean" , "std"] , "f-i-curve-slope": ["mean" , "std"]})
df_ephys_age

Unnamed: 0_level_0,avg-firing-rate,avg-firing-rate,f-i-curve-slope,f-i-curve-slope
Unnamed: 0_level_1,mean,std,mean,std
age-category,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
18-24 years,24.8528,29.901476,0.220987,0.20084
25-30 years,44.598125,62.819639,0.166321,0.198782
31-40 years,27.730826,32.773846,0.175606,0.178622
41-50 years,41.854909,51.810424,0.128486,0.145904
51-65 years,25.159412,33.856813,0.224861,0.1553
66-83 years,21.993611,32.646082,0.136333,0.134236


In [34]:
df1.head()

Unnamed: 0,cell-reporter-status,csl--normalized-depth,csl--x,csl--y,csl--z,donor--age,donor--disease-state,donor--id,donor--name,donor--race,...,specimen--name,structure--acronym,structure--id,structure--layer,structure--name,structure-parent--acronym,structure-parent--id,tag--apical,tag--dendrite-type,age-category
0,,,273.0,354.0,216.0,25,epilepsy,524848408,H16.03.003,White or Caucasian,...,H16.03.003.01.14.02,FroL,12113,3,"""frontal lobe""",FroL,12113,intact,spiny,25-30 years
1,,,322.0,255.0,92.0,33,epilepsy,536912860,H16.03.006,unknown,...,H16.03.006.01.04.04,MTG,12141,3,"""middle temporal gyrus""",MTG,12141,truncated,spiny,31-40 years
2,,,69.0,254.0,96.0,48,epilepsy,528574320,H16.06.009,unknown,...,H16.06.009.01.02.06.05,MTG,12141,5,"""middle temporal gyrus""",MTG,12141,,aspiny,41-50 years
3,,0.164136578021453,127.0,354.0,219.0,26,tumor,518229880,H16.06.007,White or Caucasian,...,H16.06.007.01.07.03,FroL,12113,3,"""frontal lobe""",FroL,12113,intact,spiny,25-30 years
4,,0.281917541960568,320.0,274.0,102.0,27,epilepsy,526633593,H16.03.005,White or Caucasian,...,H16.03.005.01.04.04,TemL,12139,3,"""temporal lobe""",TemL,12139,,aspiny,25-30 years


In [35]:
df1["donor--disease-state"].unique()

array(['epilepsy', 'tumor'], dtype=object)

In [36]:
df1["structure--name"].unique()

array(['"frontal lobe"', '"middle temporal gyrus"', '"temporal lobe"',
       '"middle frontal gyrus"', '"planum polare"', '"angular gyrus"',
       '"superior frontal gyrus"', '"inferior temporal gyrus"',
       '"inferior frontal gyrus"'], dtype=object)

In [37]:
df_firing_region = df1.groupby(["structure--name"]).agg({"avg-firing-rate":["mean", "std"]})
df_firing_region

Unnamed: 0_level_0,avg-firing-rate,avg-firing-rate
Unnamed: 0_level_1,mean,std
structure--name,Unnamed: 1_level_2,Unnamed: 2_level_2
"""angular gyrus""",25.763571,29.48841
"""frontal lobe""",53.511053,57.987098
"""inferior frontal gyrus""",132.213333,108.848352
"""inferior temporal gyrus""",11.862727,7.783057
"""middle frontal gyrus""",30.833636,47.582254
"""middle temporal gyrus""",26.662353,33.587209
"""planum polare""",81.884,56.629511
"""superior frontal gyrus""",97.26,94.285618
"""temporal lobe""",59.73,87.326149


In [38]:
# create a new df with only data from epilepsy donors

df_epilepsy = df1[df1["donor--disease-state"] == "epilepsy"]

In [39]:
# create a new df with only data from tumor donors

df_tumor = df1[df1["donor--disease-state"] == "tumor"]

In [40]:
df_firing_region_epilepsy = df_epilepsy.groupby(["structure--name"]).agg({"avg-firing-rate":["mean", "std"]})

In [41]:
df_firing_region_tumor = df_tumor.groupby(["structure--name"]).agg({"avg-firing-rate":["mean", "std"]})

In [42]:
df_firing_region_tumor.head()

Unnamed: 0_level_0,avg-firing-rate,avg-firing-rate
Unnamed: 0_level_1,mean,std
structure--name,Unnamed: 1_level_2,Unnamed: 2_level_2
"""angular gyrus""",11.152,6.188923
"""frontal lobe""",60.584286,61.632721
"""inferior frontal gyrus""",132.213333,108.848352
"""middle frontal gyrus""",30.833636,47.582254
"""middle temporal gyrus""",39.697,50.776398


In [43]:
df_firing_region_epilepsy.head()

Unnamed: 0_level_0,avg-firing-rate,avg-firing-rate
Unnamed: 0_level_1,mean,std
structure--name,Unnamed: 1_level_2,Unnamed: 2_level_2
"""angular gyrus""",33.881111,34.443089
"""frontal lobe""",33.706,46.048165
"""inferior temporal gyrus""",11.862727,7.783057
"""middle temporal gyrus""",26.164847,32.797253
"""planum polare""",81.884,56.629511


In [44]:
df_firing_region_disease = pd.merge(df_firing_region_epilepsy, df_firing_region_tumor, on = "structure--name", how = "outer", suffixes = ("-epilepsy", "-tumor"))
df_firing_region_disease

Unnamed: 0_level_0,avg-firing-rate-epilepsy,avg-firing-rate-epilepsy,avg-firing-rate-tumor,avg-firing-rate-tumor
Unnamed: 0_level_1,mean,std,mean,std
structure--name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
"""angular gyrus""",33.881111,34.443089,11.152,6.188923
"""frontal lobe""",33.706,46.048165,60.584286,61.632721
"""inferior frontal gyrus""",,,132.213333,108.848352
"""inferior temporal gyrus""",11.862727,7.783057,,
"""middle frontal gyrus""",,,30.833636,47.582254
"""middle temporal gyrus""",26.164847,32.797253,39.697,50.776398
"""planum polare""",81.884,56.629511,,
"""superior frontal gyrus""",97.26,94.285618,,
"""temporal lobe""",59.73,87.326149,,


### Trying to obtain an image of the brain atlas and hightlighting the "structure--names" andthe firing rate in the different structures (with nilearn library)

In [46]:
import nilearn.datasets

In [51]:
atlas_aal = datasets.fetch_atlas_aal(version='SPM12', verbose=1)  # downloading the brain atlas
atlas_img = atlas_aal.maps  # downloading the atlas image


Added README.md to C:\Users\laura/nilearn_data


Dataset created in C:\Users\laura/nilearn_data\aal_SPM12

Downloading data from https://www.gin.cnrs.fr/AAL_files/aal_for_SPM12.tar.gz ...


 ...done. (3 seconds, 0 min)
Extracting data from C:\Users\laura/nilearn_data\aal_SPM12\556fddbb7c4821a18d1f85eefdc0f60e\aal_for_SPM12.tar.gz..... done.
