In [1]:
import gspread
import pandas as pd
import pprint

In [2]:
# give service account details to gspread
sa = gspread.service_account(filename='credentials.json')

# sa is a gspread client, which can be used for connecting to the sheets
# by using the open method and the sheet name.
cho_recon = sa.open('temporary')

# we also need to specify the page name before getting the data.
copyattributes_sheet = cho_recon.worksheet('copyAttributes')

In [3]:
for sheets in cho_recon:
    print(sheets)

<Worksheet 'Info' id:0>
<Worksheet 'Rxns' id:1966089892>
<Worksheet 'Attributes' id:745769606>
<Worksheet 'copyAttributes' id:368082576>
<Worksheet 'Added Rxns' id:1377582373>
<Worksheet 'Genes' id:239167986>


In [23]:
# We can extract the data using the get_all_records method and create a pd DataFrame
df = pd.DataFrame(copyattributes_sheet.get_all_records())
df = df.set_index('Index')
df

Unnamed: 0_level_0,Curated,Reaction,Reaction Formula,Subsystem,EC Number,TCDB Number,Mol wt,kcat_forward,kcat_backward,Reversible,Lower bound,Upper bound,Objective
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
0,PD,10FTHF5GLUtl,10fthf5glu_c --> 10fthf5glu_l,"TRANSPORT, LYSOSOMAL",,,,,,0,0,1000,0
1,PD,10FTHF5GLUtm,10fthf5glu_m --> 10fthf5glu_c,"TRANSPORT, MITOCHONDRIAL",,,thanasis,,,0,0,1000,0
2,PD,10FTHF6GLUtl,10fthf6glu_c --> 10fthf6glu_l,"TRANSPORT, LYSOSOMAL",,,,,,0,0,1000,0
3,PD,10FTHF6GLUtm,10fthf6glu_m --> 10fthf6glu_c,"TRANSPORT, MITOCHONDRIAL",,,,,,0,0,1000,0
4,PD,10FTHF7GLUtl,10fthf7glu_c --> 10fthf7glu_l,"TRANSPORT, LYSOSOMAL",,,,,,0,0,1000,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
8195,,r2534,thr_L_e <=> thr_L_c,"TRANSPORT, EXTRACELLULAR",,,67409.13,,,1,-1000,1000,0
8196,,r2535,hom_L_e <=> hom_L_c,Transport,,,67409.13,,,1,-1000,1000,0
8197,,r2537,lnlncgcoa_c <=> lnlncgcoa_r,Transport,,,,,,1,-1000,1000,0
8198,,r2538,dlnlcgcoa_c <=> dlnlcgcoa_r,Transport,,,,,,1,-1000,1000,0


In [28]:
for index, row in df.iterrows():
    
    if row['Reaction Formula'] != '':
        row_number = index + 2
        print(row_number)
        column_index = df.columns.get_loc('Reaction Formula') + 1
        print(column_index)
        cell = copyattributes_sheet.cell(row_number,column_index)
        print('Cell Before Update: ',cell.value)

2
3
Cell Before Update:  10FTHF5GLUtl
3
3
Cell Before Update:  10FTHF5GLUtm
4
3
Cell Before Update:  10FTHF6GLUtl
5
3
Cell Before Update:  10FTHF6GLUtm
6
3
Cell Before Update:  10FTHF7GLUtl
7
3
Cell Before Update:  10FTHF7GLUtm
8
3
Cell Before Update:  10FTHFtl
9
3
Cell Before Update:  10FTHFtm
10
3
Cell Before Update:  11DOCRTSLtm
11
3
Cell Before Update:  11DOCRTSLtr
12
3
Cell Before Update:  11DOCRTSTRNtm
13
3
Cell Before Update:  11DOCRTSTRNtr
14
3
Cell Before Update:  12HPETATP
15
3
Cell Before Update:  12HPETUPKt
16
3
Cell Before Update:  13DAMPPOX
17
3
Cell Before Update:  15HPETATP
18
3


KeyboardInterrupt: 

In [None]:
#Update Cell
cell = work_sheet.cell(3,8)
print('Cell Before Update: ',cell.value)
work_sheet.update_cell(3,8,'thanasis')
cell = work_sheet.cell(3,8)
print('Cell After Update: ',cell.value)

In [None]:
# add a sheet with 8200 rows and 2 columns
cho_recon.add_worksheet(rows=8200,cols=14,title='Genes')

# get the instance of the second sheet
genes_sheet = sheet.worksheet('Genes')

In [None]:
# Fetch information from the NIH database

In [2]:
from Bio import Entrez

def get_gene_info(gene_id):
    '''
    This function retrieves information from the NIH Gene database
    
    input: Gene Entrez ID
    output: Gene Symbol, Gene Name, Gene Description, Gene Ensembl ID, NCBI Transcript ID, NCBI Protein ID
    '''
    Entrez.email = 'account1@theta-ocean-377718.iam.gserviceaccount.com'
    handle = Entrez.efetch(db='gene', id=gene_id, retmode='xml')
    record = Entrez.read(handle)[0]

    gene_name = record['Entrezgene_gene']['Gene-ref']['Gene-ref_desc']
    gene_symbol = record['Entrezgene_gene']['Gene-ref']['Gene-ref_locus']

    # check for different possible formats of the data
    if 'Entrezgene_comments' in record and 'Gene-commentary_comment' in record['Entrezgene_comments'][0]:
        gene_description = record['Entrezgene_comments'][0]['Gene-commentary_comment'][0]['String']
    elif 'Entrezgene_summary' in record:
        gene_description = record['Entrezgene_summary']
    else:
        gene_description = None

    if 'Entrezgene_track-info' in record:
        gene_ensembl_id = next((xref['Dbtag_tag']['Object-id']['Object-id_str'] for xref in record['Entrezgene_gene']['Gene-ref']['Gene-ref_db'] if xref['Dbtag_db'] == 'Ensembl'), None)
    else:
        gene_ensembl_id = None

    xrefs = record['Entrezgene_locus'][0]['Gene-commentary_products']
    
    for xref in xrefs:
        if xref.get('Gene-commentary_accession').startswith('NM_'):
            mRNA_ncbi_id = xref.get('Gene-commentary_accession')
            protein_ncbi_id = xref['Gene-commentary_products'][0].get('Gene-commentary_accession')
            break
        elif xref.get('Gene-commentary_accession').startswith('XM_'):
            mRNA_ncbi_id = xref.get('Gene-commentary_accession')
            protein_ncbi_id = xref['Gene-commentary_products'][0].get('Gene-commentary_accession')
            break
    

    handle.close()

    return gene_symbol, gene_name, gene_description, gene_ensembl_id, mRNA_ncbi_id, protein_ncbi_id


In [3]:
gene_id = '100767060'
gene_symbol, gene_name, gene_description, gene_ensembl_id, mRNA_ncbi_id, protein_ncbi_id = get_gene_info(gene_id)
print(gene_symbol)
print(gene_name)
print(gene_description)
print(gene_ensembl_id)
print(mRNA_ncbi_id)
print(protein_ncbi_id)

Acox3
acyl-CoA oxidase 3, pristanoyl
None
ENSCGRG00015025799
XM_007651628
XP_007649818
