## New approach using a downloaded database to get around 10 request per second limit
Inputs: datafile (pandas df), name of column containing names

Outputs: Results.csv, gene_lookup.log

In [8]:
import csv
import os
import re
import pandas as pd
import time
import requests
import logging
from Functions import addColumns, makeAndFetchURL, search_single_gene

log_path = os.path.join(os.getcwd(), 'gene_lookup.log')
logging.basicConfig(
    filename=log_path,  # using absolute path
    filemode='a',       # append mode, use "w" to overwrite each time
    level=logging.INFO, # logging level
    format='%(asctime)s - %(levelname)s - %(message)s'
)

In [9]:
def ALL(df_original, name_col):    
    #1. Find what info is needed and then download data, returning path
    columns_needed = addColumns(df_original, name_col)
    path = makeAndFetchURL(columns_needed)

    #2. Prep new df (add columns)
    df = df_original.copy()
    position = df.columns.get_loc(name_col)

    new_columns = pd.DataFrame({
    'Approved symbol': [None] * len(df),
    'Approved name': [None] * len(df),
    'Previous symbols': [None] * len(df),
    'Alias symbols': [None] * len(df)})

    for i, col in enumerate(new_columns.columns):
        df.insert(position + 1 + i, col, new_columns[col])

    #3. Iterate through each entry and add names
    for idx, row in df.iterrows():
        name = row[name_col]
        aSym, aName, pSym, alias = search_single_gene(path, name)

        if len(aSym.index) == 0:
            logging.info(f"Entry not in downloaded database, using API",name)
            #Use API to fetch in two rounds (first for approved, then using approved to find rest of data)
        else:
            # If multiple entries are returned, log them.
            if len(aSym.index) > 1:
                logging.info(f"Multiple entries found for %s: Approved Symbols %s",name, list(aSym))
            df.at[idx, 'Approved symbol'] = aSym[0]
            df.at[idx, 'Approved name'] = aName[0]
            df.at[idx, 'Previous symbols'] = pSym[0]
            df.at[idx, 'Alias symbols'] = alias[0]

    os.remove(path)
    df = df.drop(columns=[name_col])
    return df

test = pd.read_csv('test.csv')
result = ALL(test, "Name")
result.to_csv('result.csv', index=False)