In [196]:
system_template = f'''Give the names of any location such as city, state, country from the provided sentence in the specified format:
---BEGIN FORMAT TEMPLATE---
{{"CITY":"city"
"STATE":"state of the city"
"COUNTRY": "the country the city and state belong to"}}
---END FORMAT TEMPLATE---
Give the output of the format template in json format
'''

In [197]:
import threading
import time
import openai
from threading import Lock
from langchain_community.embeddings import OpenAIEmbeddings
from dotenv import load_dotenv
load_dotenv()
import json
from langchain_community.vectorstores import Chroma
import pandas as pd

class ProcessQuery():
    
    _instance = None
    _lock = Lock()
    _query_df = None
    
    def __new__(cls, *args, **kwargs):
        with cls._lock:
            if cls._instance is None:
                cls._instance = super().__new__(cls)
                cls._instance._initialize_vector_db()
                cls._instance._initialize_query_df()
        return cls._instance
    
    def _initialize_vector_db(self):
        self.vector_database = Chroma(persist_directory='../CT_VDB/VDB_V_01/', embedding_function=OpenAIEmbeddings())
        
    def _initialize_query_df(self):
        if ProcessQuery._query_df is None:
            ProcessQuery._query_df = pd.read_csv('../ct_csv/FinalCTTrialsDF_P1_w_ContactInfo.csv')
    
    def get_nct_scores(self, docs:list) -> dict:
        ct_score_dict = {}
        for doc in docs:
            ct_score_dict[doc[0].metadata['nct_number']] = doc[1]  
        return ct_score_dict  
    
    
    def search_vector_db(self, args, result_dict):
        vector_db = self.vector_database
        result = vector_db.similarity_search_with_relevance_scores(args)
        nct_score_dict = self.get_nct_scores(result)
        result_dict['vector_db_scores_dict'] = nct_score_dict
        result_dict['vector_db_nct_numbers'] = list(nct_score_dict.keys())
        
        
    def get_location(self, system_prompt, user_prompt, model='gpt-4-0125-preview', temperature=0, verbose=False):
        response = openai.chat.completions.create(
            model=model, 
            temperature=temperature,
            messages=[
                {"role":"system", "content":system_prompt},
                {"role":"user", "content":str(user_prompt)},
            ],
            max_tokens = 1024,
            response_format={ "type": "json_object" }
            
        )
        res = response.choices[0].message.content
        return res

    def search_dataframe(self, args, result_dict):
        response = self.get_location(system_prompt=system_template, user_prompt=args)
        response_dict = json.loads(response)
        result_dict['location'] = response_dict
        

    @classmethod
    def process_query(cls, query:str):
        result_dict = {}
        vectordb_thread = threading.Thread(target=cls().search_vector_db, args=(query,result_dict))
        smart_df_thread = threading.Thread(target=cls().search_dataframe, args=(query,result_dict))
        vectordb_thread.start()
        smart_df_thread.start()
        vectordb_thread.join()
        smart_df_thread.join()
        query_df = pd.read_csv('../ct_csv/FinalCTTrialsDF_P1_w_ContactInfo.csv')
        print(result_dict)
        
        scores_df = pd.DataFrame.from_dict(result_dict['vector_db_scores_dict'], orient='index', columns=['score'])
        scores_df.reset_index(inplace=True)
        scores_df.columns = ['NCT_NUMBER', 'score']
        
        distilled_df = query_df[query_df['NCT_NUMBER'].isin(result_dict['vector_db_nct_numbers'])]
        distilled_df = pd.merge(distilled_df, scores_df, on='NCT_NUMBER')

        distilled_df = distilled_df.sort_values(by='score', ascending=False)
        
        distilled_df.to_csv('../result_tests/result.csv')
        
        if len(distilled_df[distilled_df['CITY'] == result_dict['location']['CITY']]):
            distilled_df = distilled_df[distilled_df['CITY'] == result_dict['location']['CITY']]
        elif len(distilled_df[distilled_df['STATE'] == result_dict['location']['STATE']]):
            distilled_df = distilled_df[distilled_df['STATE'] == result_dict['location']['STATE']]
        elif len(distilled_df[distilled_df['COUNTRY'] == result_dict['location']['COUNTRY']]):
            distilled_df = distilled_df[distilled_df['COUNTRY'] == result_dict['location']['COUNTRY']]
        # else:
        #     distilled_df = {}
        return distilled_df
    
    
    def __init__(self):
        self.return_value = None

In [195]:
df = pd.read_csv('/Users/suryabhosale/Documents/projects/DORIS/src/POCClinicalTrial/CT_SEARCH_METHODS/hybrid_v1/FinalCTTrialsDF_P1.csv')
df[df['CITY'] == 'Washington']

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,NCT_NUMBER,STUDY_TITLE,STUDY_URL,STUDY_STATUS,CONDITIONS,START_DATE,COMPLETION_DATE,AGE,ELIGIBILITY_CRITERIA,PRIMARY_OUTCOMES,SECONDARY_OUTCOMES,INTERVENTIONS,FACILITY,CITY,STATE,ZIP,COUNTRY
274,274,274,NCT04267848,Testing the Addition of a Type of Drug Called ...,https://www.clinicaltrials.gov/study/NCT04267848,Recruiting,"Lung Non-Small Cell Carcinoma, Lung Non-Small ...","June 16, 2020","December 15, 2024",18 Years||Adult|Older Adult,Eligibility includes: sexually mature females ...,The primary outcome is to compare disease-free...,Secondary outcomes include overall survival (O...,"Interventions include biospecimen collection, ...",Kaiser Permanente-Capitol Hill Medical Center,Washington,DC,20002,United States
275,275,275,NCT04267848,Testing the Addition of a Type of Drug Called ...,https://www.clinicaltrials.gov/study/NCT04267848,Recruiting,"Lung Non-Small Cell Carcinoma, Lung Non-Small ...","June 16, 2020","December 15, 2024",18 Years||Adult|Older Adult,Eligibility includes: sexually mature females ...,The primary outcome is to compare disease-free...,Secondary outcomes include overall survival (O...,"Interventions include biospecimen collection, ...",MedStar Georgetown University Hospital,Washington,DC,20007,United States
276,276,276,NCT04267848,Testing the Addition of a Type of Drug Called ...,https://www.clinicaltrials.gov/study/NCT04267848,Recruiting,"Lung Non-Small Cell Carcinoma, Lung Non-Small ...","June 16, 2020","December 15, 2024",18 Years||Adult|Older Adult,Eligibility includes: sexually mature females ...,The primary outcome is to compare disease-free...,Secondary outcomes include overall survival (O...,"Interventions include biospecimen collection, ...",MedStar Washington Hospital Center,Washington,DC,20010,United States
394,394,394,NCT04267848,Testing the Addition of a Type of Drug Called ...,https://www.clinicaltrials.gov/study/NCT04267848,Recruiting,"Lung Non-Small Cell Carcinoma, Lung Non-Small ...","June 16, 2020","December 15, 2024",18 Years||Adult|Older Adult,Eligibility includes: sexually mature females ...,The primary outcome is to compare disease-free...,Secondary outcomes include overall survival (O...,"Interventions include biospecimen collection, ...",Illinois CancerCare - Washington,Washington,Illinois,61571,United States
724,724,724,NCT04267848,Testing the Addition of a Type of Drug Called ...,https://www.clinicaltrials.gov/study/NCT04267848,Recruiting,"Lung Non-Small Cell Carcinoma, Lung Non-Small ...","June 16, 2020","December 15, 2024",18 Years||Adult|Older Adult,Eligibility includes: sexually mature females ...,The primary outcome is to compare disease-free...,Secondary outcomes include overall survival (O...,"Interventions include biospecimen collection, ...",Mercy Hospital Washington,Washington,Missouri,63090,United States
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16622,16622,16622,NCT05836584,Testing Immunotherapy (Atezolizumab) With or W...,https://www.clinicaltrials.gov/study/NCT05836584,Recruiting,"Clinical Stage I Gastric Cancer AJCC v8, Clini...","June 8, 2024","October 31, 2027",18 Years||Adult|Older Adult,Eligibility includes individuals 18 years or o...,The primary outcome measure is event-free surv...,Secondary outcomes include tumor regression gr...,Interventions involve two experimental arms. A...,Illinois CancerCare - Washington,Washington,Illinois,61571,United States
16952,16952,16952,NCT03866382,Testing the Effectiveness of Two Immunotherapy...,https://www.clinicaltrials.gov/study/NCT03866382,Recruiting,"Bladder Adenocarcinoma, Bladder Clear Cell Ade...","May 13, 2019","February 28, 2025",18 Years||Adult|Older Adult,Eligibility includes individuals 18 years or o...,The primary outcome is the Objective Response ...,Secondary outcomes include the duration of res...,"Participants will receive cabozantinib orally,...",MedStar Georgetown University Hospital,Washington,DC,20007,United States
17019,17019,17019,NCT03866382,Testing the Effectiveness of Two Immunotherapy...,https://www.clinicaltrials.gov/study/NCT03866382,Recruiting,"Bladder Adenocarcinoma, Bladder Clear Cell Ade...","May 13, 2019","February 28, 2025",18 Years||Adult|Older Adult,Eligibility includes individuals 18 years or o...,The primary outcome is the Objective Response ...,Secondary outcomes include the duration of res...,"Participants will receive cabozantinib orally,...",Illinois CancerCare - Washington,Washington,Illinois,61571,United States
17208,17208,17208,NCT03866382,Testing the Effectiveness of Two Immunotherapy...,https://www.clinicaltrials.gov/study/NCT03866382,Recruiting,"Bladder Adenocarcinoma, Bladder Clear Cell Ade...","May 13, 2019","February 28, 2025",18 Years||Adult|Older Adult,Eligibility includes individuals 18 years or o...,The primary outcome is the Objective Response ...,Secondary outcomes include the duration of res...,"Participants will receive cabozantinib orally,...",Mercy Hospital Washington,Washington,Missouri,63090,United States


In [198]:
items = ProcessQuery.process_query("What are potential clinical trial options for a patient with metastatic breast cancer in Irvine area?")

{'vector_db_scores_dict': {'NCT06072612': 0.7684220756613157, 'NCT03213925': 0.7608751977466823, 'NCT04925856': 0.760379866909558, 'NCT05872347': 0.7560558742068556}, 'vector_db_nct_numbers': ['NCT06072612', 'NCT03213925', 'NCT04925856', 'NCT05872347'], 'location': {'CITY': 'Irvine', 'STATE': 'California', 'COUNTRY': 'United States'}}


In [199]:
items = ProcessQuery.process_query("What are potential clinical trial options for a patient with metastatic breast cancer in Irvine area?")

{'vector_db_scores_dict': {'NCT06072612': 0.7684220756613157, 'NCT03213925': 0.7608751977466823, 'NCT04925856': 0.760379866909558, 'NCT05872347': 0.7560558742068556}, 'vector_db_nct_numbers': ['NCT06072612', 'NCT03213925', 'NCT04925856', 'NCT05872347'], 'location': {'CITY': 'Irvine', 'STATE': 'California', 'COUNTRY': 'United States'}}


In [200]:
items

Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,NCT_NUMBER,STUDY_TITLE,STUDY_URL,STUDY_STATUS,CONDITIONS,START_DATE,COMPLETION_DATE,...,PRIMARY_OUTCOMES,SECONDARY_OUTCOMES,INTERVENTIONS,FACILITY,CITY,STATE,ZIP,COUNTRY,POINT_OF_CONTACT,score
40,10664,10664,10664,NCT06072612,Study of the Bria-IMT Regimen and CPI vs Physi...,https://www.clinicaltrials.gov/study/NCT06072612,Recruiting,"Breast Cancer, Metastatic Breast Cancer, Breas...","October 20, 2023",December 2025,...,The primary outcome is to assess the effect of...,Secondary outcomes include evaluating the Bria...,Interventions include the Bria-IMT regimen (Da...,New York Cancers & Blood Specialists,Port Jefferson Station,New York,11776,United States,"{'Name': None, 'Organization': None, 'Email': ...",0.768422


In [202]:
json_payload = pd.DataFrame(items, columns=items.columns).to_json(orient='records')
json_payload_dict = json.loads(json_payload)

In [203]:
json_payload_dict

[{'Unnamed: 0.2': 10664,
  'Unnamed: 0.1': 10664,
  'Unnamed: 0': 10664,
  'NCT_NUMBER': 'NCT06072612',
  'STUDY_TITLE': "Study of the Bria-IMT Regimen and CPI vs Physicians' Choice in Advanced Metastatic Breast Cancer.",
  'STUDY_URL': 'https://www.clinicaltrials.gov/study/NCT06072612',
  'STUDY_STATUS': 'Recruiting',
  'CONDITIONS': 'Breast Cancer, Metastatic Breast Cancer, Breast Neoplasm, Breast Cancer Metastatic, End Stage Cancer',
  'START_DATE': 'October 20, 2023',
  'COMPLETION_DATE': 'December 2025',
  'AGE': '18 Years||Adult|Older Adult',
  'ELIGIBILITY_CRITERIA': 'Eligibility includes individuals aged 18 or older with histologically confirmed breast cancer, either locally recurrent unresectable/metastatic, having failed prior therapy. Specific requirements vary by cancer subtype, including treatment history for HER2 positive, ER/PR positive, triple-negative, and HER2 low tumors. Brain metastases must be stable, with no steroids for 2 weeks prior to the first dose. Exclusions

In [201]:
values_dict = {'vector_db_scores_dict': {'NCT03191149': 0.797766271027995, 'NCT03865511': 0.78732116507551, 'NCT04335292': 0.7868135905788933, 'NCT04780568': 0.7826158698343295}, 'vector_db_nct_numbers': ['NCT03191149', 'NCT03865511', 'NCT04335292', 'NCT04780568'], 'location': {'CITY': 'Chicago', 'STATE': 'Illinois', 'COUNTRY': 'United States'}}



scores_df = pd.DataFrame.from_dict(values_dict['vector_db_scores_dict'], orient='index', columns=['score'])
scores_df.reset_index(inplace=True)
scores_df.columns = ['NCT_NUMBER', 'score']

# # Filter final DataFrame based on vector_db_nct_numbers
final_df = pd.read_csv('../ct_csv/FinalCTTrialsDF_P1.csv')
# final_df = final_df[final_df['NCT_NUMBER'].isin(values_dict['vector_db_nct_numbers'])]

# # # Merge final DataFrame with scores DataFrame
# final_df = pd.merge(final_df, scores_df, on='NCT_NUMBER')

# # # # Sort DataFrame based on scores
# final_df = final_df.sort_values(by='score', ascending=False)

# city_df = final_df[final_df['CITY'] == values_dict['location']['CITY']]
# state_df = final_df[final_df['STATE'] == values_dict['location']['STATE']]
# country_df = final_df[final_df['COUNTRY'] == values_dict['location']['COUNTRY']]

# # Concatenate DataFrames and drop duplicates
# final_df = pd.concat([city_df, state_df, country_df])
# final_df = final_df.drop_duplicates(subset='NCT_NUMBER', keep='first')




In [169]:
name_change_dict = {'District of Columbia':'DC'}
final_df['STATE'].replace(name_change_dict, inplace=True)

In [173]:
final_df[final_df['CITY']=='Washington']

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,NCT_NUMBER,STUDY_TITLE,STUDY_URL,STUDY_STATUS,CONDITIONS,START_DATE,COMPLETION_DATE,AGE,ELIGIBILITY_CRITERIA,PRIMARY_OUTCOMES,SECONDARY_OUTCOMES,INTERVENTIONS,FACILITY,CITY,STATE,ZIP,COUNTRY
274,274,274,NCT04267848,Testing the Addition of a Type of Drug Called ...,https://www.clinicaltrials.gov/study/NCT04267848,Recruiting,"Lung Non-Small Cell Carcinoma, Lung Non-Small ...","June 16, 2020","December 15, 2024",18 Years||Adult|Older Adult,Eligibility includes: sexually mature females ...,The primary outcome is to compare disease-free...,Secondary outcomes include overall survival (O...,"Interventions include biospecimen collection, ...",Kaiser Permanente-Capitol Hill Medical Center,Washington,DC,20002,United States
275,275,275,NCT04267848,Testing the Addition of a Type of Drug Called ...,https://www.clinicaltrials.gov/study/NCT04267848,Recruiting,"Lung Non-Small Cell Carcinoma, Lung Non-Small ...","June 16, 2020","December 15, 2024",18 Years||Adult|Older Adult,Eligibility includes: sexually mature females ...,The primary outcome is to compare disease-free...,Secondary outcomes include overall survival (O...,"Interventions include biospecimen collection, ...",MedStar Georgetown University Hospital,Washington,DC,20007,United States
276,276,276,NCT04267848,Testing the Addition of a Type of Drug Called ...,https://www.clinicaltrials.gov/study/NCT04267848,Recruiting,"Lung Non-Small Cell Carcinoma, Lung Non-Small ...","June 16, 2020","December 15, 2024",18 Years||Adult|Older Adult,Eligibility includes: sexually mature females ...,The primary outcome is to compare disease-free...,Secondary outcomes include overall survival (O...,"Interventions include biospecimen collection, ...",MedStar Washington Hospital Center,Washington,DC,20010,United States
394,394,394,NCT04267848,Testing the Addition of a Type of Drug Called ...,https://www.clinicaltrials.gov/study/NCT04267848,Recruiting,"Lung Non-Small Cell Carcinoma, Lung Non-Small ...","June 16, 2020","December 15, 2024",18 Years||Adult|Older Adult,Eligibility includes: sexually mature females ...,The primary outcome is to compare disease-free...,Secondary outcomes include overall survival (O...,"Interventions include biospecimen collection, ...",Illinois CancerCare - Washington,Washington,Illinois,61571,United States
724,724,724,NCT04267848,Testing the Addition of a Type of Drug Called ...,https://www.clinicaltrials.gov/study/NCT04267848,Recruiting,"Lung Non-Small Cell Carcinoma, Lung Non-Small ...","June 16, 2020","December 15, 2024",18 Years||Adult|Older Adult,Eligibility includes: sexually mature females ...,The primary outcome is to compare disease-free...,Secondary outcomes include overall survival (O...,"Interventions include biospecimen collection, ...",Mercy Hospital Washington,Washington,Missouri,63090,United States
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16622,16622,16622,NCT05836584,Testing Immunotherapy (Atezolizumab) With or W...,https://www.clinicaltrials.gov/study/NCT05836584,Recruiting,"Clinical Stage I Gastric Cancer AJCC v8, Clini...","June 8, 2024","October 31, 2027",18 Years||Adult|Older Adult,Eligibility includes individuals 18 years or o...,The primary outcome measure is event-free surv...,Secondary outcomes include tumor regression gr...,Interventions involve two experimental arms. A...,Illinois CancerCare - Washington,Washington,Illinois,61571,United States
16952,16952,16952,NCT03866382,Testing the Effectiveness of Two Immunotherapy...,https://www.clinicaltrials.gov/study/NCT03866382,Recruiting,"Bladder Adenocarcinoma, Bladder Clear Cell Ade...","May 13, 2019","February 28, 2025",18 Years||Adult|Older Adult,Eligibility includes individuals 18 years or o...,The primary outcome is the Objective Response ...,Secondary outcomes include the duration of res...,"Participants will receive cabozantinib orally,...",MedStar Georgetown University Hospital,Washington,DC,20007,United States
17019,17019,17019,NCT03866382,Testing the Effectiveness of Two Immunotherapy...,https://www.clinicaltrials.gov/study/NCT03866382,Recruiting,"Bladder Adenocarcinoma, Bladder Clear Cell Ade...","May 13, 2019","February 28, 2025",18 Years||Adult|Older Adult,Eligibility includes individuals 18 years or o...,The primary outcome is the Objective Response ...,Secondary outcomes include the duration of res...,"Participants will receive cabozantinib orally,...",Illinois CancerCare - Washington,Washington,Illinois,61571,United States
17208,17208,17208,NCT03866382,Testing the Effectiveness of Two Immunotherapy...,https://www.clinicaltrials.gov/study/NCT03866382,Recruiting,"Bladder Adenocarcinoma, Bladder Clear Cell Ade...","May 13, 2019","February 28, 2025",18 Years||Adult|Older Adult,Eligibility includes individuals 18 years or o...,The primary outcome is the Objective Response ...,Secondary outcomes include the duration of res...,"Participants will receive cabozantinib orally,...",Mercy Hospital Washington,Washington,Missouri,63090,United States


In [171]:
final_df.to_csv('../ct_csv/FinalCTTrialsDF_P1.csv')

Endpoint for entering ZIP codes:

In [204]:
!pip install uszipcode geopy

Collecting uszipcode
  Obtaining dependency information for uszipcode from https://files.pythonhosted.org/packages/b6/ba/a3e285c39363fe94c961cf483bd37064d19c454611d73c31dedd54e3373e/uszipcode-1.0.1-py2.py3-none-any.whl.metadata
  Downloading uszipcode-1.0.1-py2.py3-none-any.whl.metadata (8.9 kB)
Collecting geopy
  Obtaining dependency information for geopy from https://files.pythonhosted.org/packages/e5/15/cf2a69ade4b194aa524ac75112d5caac37414b20a3a03e6865dfe0bd1539/geopy-2.4.1-py3-none-any.whl.metadata
  Downloading geopy-2.4.1-py3-none-any.whl.metadata (6.8 kB)
Collecting pathlib-mate (from uszipcode)
  Obtaining dependency information for pathlib-mate from https://files.pythonhosted.org/packages/5e/0f/6b6342767cb06562f4d67295191cd5c63775c04765766499a70265388db6/pathlib_mate-1.3.2-py3-none-any.whl.metadata
  Downloading pathlib_mate-1.3.2-py3-none-any.whl.metadata (8.4 kB)
Collecting atomicwrites (from uszipcode)
  Downloading atomicwrites-1.4.1.tar.gz (14 kB)
  Preparing metadata (s

In [205]:
from uszipcode import SearchEngine
from geopy.distance import geodesic

In [210]:
search = SearchEngine()

In [211]:
zip_info = search.by_zipcode(11205)

In [213]:
center_lat = zip_info.lat
center_lon = zip_info.lng

In [215]:
nearby_zip_codes = search.by_coordinates(center_lat, center_lon, radius=100, returns=0)

Exception ignored in: <function SearchEngine.__del__ at 0x1699af8b0>
Traceback (most recent call last):
  File "/Users/suryabhosale/miniforge3/envs/langchain/lib/python3.9/site-packages/uszipcode/search.py", line 195, in __del__
    if self.ses:
AttributeError: 'SearchEngine' object has no attribute 'ses'


In [None]:
num_closest = 100
distances = []
for zip_info in nearby_zip_codes:
    if zip_info.lat and zip_info.lng:  # Some zip codes might lack lat/lon info
        distance = geodesic((center_lat, center_lon), (zip_info.lat, zip_info.lng)).miles
        distances.append((zip_info.zipcode, distance))

# Sort zip codes by distance and return the closest ones
closest_zip_codes = sorted(distances, key=lambda x: x[1])[:num_closest]
for zip , distance in closest_zip_codes:
    print(round(distance,2))

Function to find the closest zip_codes for a user input zipcode

In [226]:
def get_closest_zip_codes(zip_code:int, num_closest:int=100, radius=100):
    distances = []
    search_engine = SearchEngine()
    zip_info = search_engine.by_zipcode(zip_code)
    center_lat, center_lon = zip_info.lat, zip_info.lng
    nearby_zip_codes = search_engine.by_coordinates(center_lat, center_lon, radius=radius, returns=0)
    for zip_instance in nearby_zip_codes:
        if zip_instance.lat and zip_instance.lng:
            distance = geodesic((center_lat, center_lon), (zip_instance.lat, zip_instance.lng)).miles
            distances.append((zip_instance.zipcode, distance))
    closest_zip_codes = [(item[0], round(item[1], 2)) for item in sorted(distances, key=lambda x: x[1])[:num_closest]]
    return closest_zip_codes    

In [228]:
get_closest_zip_codes(11205)

[('11205', 0.0), ('11240', 0.53), ('11217', 0.87), ('11238', 0.87), ('11201', 1.05), ('11216', 1.26), ('11206', 1.72), ('10002', 1.73), ('10041', 1.73), ('10045', 1.73), ('10055', 1.73), ('10060', 1.73), ('10090', 1.73), ('10095', 1.73), ('10104', 1.73), ('10105', 1.73), ('10106', 1.73), ('10107', 1.73), ('10118', 1.73), ('10120', 1.73), ('10121', 1.73), ('10122', 1.73), ('10123', 1.73), ('10151', 1.73), ('10155', 1.73), ('10158', 1.73), ('10161', 1.73), ('10166', 1.73), ('10175', 1.73), ('10176', 1.73), ('10178', 1.73), ('10260', 1.73), ('10265', 1.73), ('10270', 1.73), ('10281', 1.73), ('11211', 1.73), ('10015', 2.09), ('10038', 2.09), ('10099', 2.09), ('11213', 2.09), ('11221', 2.1), ('11231', 2.21), ('11215', 2.32), ('11225', 2.32), ('10005', 2.51), ('10006', 2.51), ('10007', 2.51), ('10048', 2.51), ('10271', 2.51), ('10279', 2.51), ('10013', 2.6), ('10278', 2.6), ('10004', 2.63), ('11237', 2.72), ('11233', 2.72), ('10009', 2.81), ('11226', 2.81), ('11232', 2.95), ('10003', 2.95), 

In [246]:
import pandas as pd
temp_df = pd.read_csv('../check.csv')
# temp_df.ZIP[17929]

In [239]:
temp_df.ZIP.dropna(inplace=True)

In [247]:
temp_df.dropna(subset=['ZIP']).ZIP.apply(type)

0      <class 'str'>
1      <class 'str'>
2      <class 'str'>
3      <class 'str'>
4      <class 'str'>
           ...      
340    <class 'str'>
341    <class 'str'>
342    <class 'str'>
343    <class 'str'>
344    <class 'str'>
Name: ZIP, Length: 345, dtype: object

In [248]:
temp_df

Unnamed: 0.4,Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,NCT_NUMBER,STUDY_TITLE,STUDY_URL,STUDY_STATUS,CONDITIONS,START_DATE,...,PRIMARY_OUTCOMES,SECONDARY_OUTCOMES,INTERVENTIONS,FACILITY,CITY,STATE,ZIP,COUNTRY,POINT_OF_CONTACT,score
0,0,7744,7744,7744,NCT03191149,Osimertinib in Treating Patients With Stage II...,https://www.clinicaltrials.gov/study/NCT03191149,Recruiting,"Advanced Lung Non-Small Cell Carcinoma, Recurr...","April 25, 2018",...,The primary outcome is the best objective resp...,Secondary outcomes include progression-free su...,Participants will receive osimertinib orally o...,Anchorage Associates in Radiation Medicine,Anchorage,Alaska,98508,United States,"{'Name': None, 'Organization': None, 'Email': ...",0.813524
1,242,7986,7986,7986,NCT03191149,Osimertinib in Treating Patients With Stage II...,https://www.clinicaltrials.gov/study/NCT03191149,Recruiting,"Advanced Lung Non-Small Cell Carcinoma, Recurr...","April 25, 2018",...,The primary outcome is the best objective resp...,Secondary outcomes include progression-free su...,Participants will receive osimertinib orally o...,University of Oklahoma Health Sciences Center,Oklahoma City,Oklahoma,73104,United States,"{'Name': None, 'Organization': None, 'Email': ...",0.813524
2,220,7964,7964,7964,NCT03191149,Osimertinib in Treating Patients With Stage II...,https://www.clinicaltrials.gov/study/NCT03191149,Recruiting,"Advanced Lung Non-Small Cell Carcinoma, Recurr...","April 25, 2018",...,The primary outcome is the best objective resp...,Secondary outcomes include progression-free su...,Participants will receive osimertinib orally o...,Oncology Hematology Care Inc-Kenwood,Cincinnati,Ohio,45236,United States,"{'Name': None, 'Organization': None, 'Email': ...",0.813524
3,219,7963,7963,7963,NCT03191149,Osimertinib in Treating Patients With Stage II...,https://www.clinicaltrials.gov/study/NCT03191149,Recruiting,"Advanced Lung Non-Small Cell Carcinoma, Recurr...","April 25, 2018",...,The primary outcome is the best objective resp...,Secondary outcomes include progression-free su...,Participants will receive osimertinib orally o...,Miami Valley Hospital South,Centerville,Ohio,45459,United States,"{'Name': None, 'Organization': None, 'Email': ...",0.813524
4,218,7962,7962,7962,NCT03191149,Osimertinib in Treating Patients With Stage II...,https://www.clinicaltrials.gov/study/NCT03191149,Recruiting,"Advanced Lung Non-Small Cell Carcinoma, Recurr...","April 25, 2018",...,The primary outcome is the best objective resp...,Secondary outcomes include progression-free su...,Participants will receive osimertinib orally o...,Dayton Physicians LLC-Miami Valley South,Centerville,Ohio,45459,United States,"{'Name': None, 'Organization': None, 'Email': ...",0.813524
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
340,341,16554,16554,16554,NCT04335292,Osimertinib Then Chemotherapy in EGFR-mutated ...,https://www.clinicaltrials.gov/study/NCT04335292,Recruiting,Non-Small Cell Lung Cancer,"January 6, 2021",...,The primary outcome is the Objective Response ...,Secondary outcomes include Progression Free Su...,Interventions include first-line treatment wit...,"Durham Regional Cancer Centre, Lakeridge Health",Oshawa,Ontario,L1G 2B9,Canada,"{'Name': None, 'Organization': None, 'Email': ...",0.807129
341,342,16555,16555,16555,NCT04335292,Osimertinib Then Chemotherapy in EGFR-mutated ...,https://www.clinicaltrials.gov/study/NCT04335292,Recruiting,Non-Small Cell Lung Cancer,"January 6, 2021",...,The primary outcome is the Objective Response ...,Secondary outcomes include Progression Free Su...,Interventions include first-line treatment wit...,The Ottawa Hospital,Ottawa,Ontario,K1H 8L6,Canada,"{'Name': None, 'Organization': None, 'Email': ...",0.807129
342,343,16556,16556,16556,NCT04335292,Osimertinib Then Chemotherapy in EGFR-mutated ...,https://www.clinicaltrials.gov/study/NCT04335292,Recruiting,Non-Small Cell Lung Cancer,"January 6, 2021",...,The primary outcome is the Objective Response ...,Secondary outcomes include Progression Free Su...,Interventions include first-line treatment wit...,Sunnybrook Research Institute,Toronto,Ontario,M4N 3M5,Canada,"{'Name': None, 'Organization': None, 'Email': ...",0.807129
343,344,16557,16557,16557,NCT04335292,Osimertinib Then Chemotherapy in EGFR-mutated ...,https://www.clinicaltrials.gov/study/NCT04335292,Recruiting,Non-Small Cell Lung Cancer,"January 6, 2021",...,The primary outcome is the Objective Response ...,Secondary outcomes include Progression Free Su...,Interventions include first-line treatment wit...,Princess Margaret Hospital,Toronto,Ontario,M5G 2M9,Canada,"{'Name': None, 'Organization': None, 'Email': ...",0.807129


In [249]:
temp_zip_list = ['11205', '11240', '11217', '11238', '11201', '11216', '11206', '10002', '10041', '10045', '10055', '10060', '10090', '10095', '10104', '10105', '10106', '10107', '10118', '10120', '10121', '10122', '10123', '10151', '10155', '10158', '10161', '10166', '10175', '10176', '10178', '10260', '10265', '10270', '10281', '11211', '10015', '10038', '10099', '11213', '11221', '11231', '11215', '11225', '10005', '10006', '10007', '10048', '10271', '10279', '10013', '10278', '10004', '11237', '11233', '10009', '11226', '11232', '10003', '11222', '10280', '10012', '10282', '07309', '10014', '10010', '11218', '07311', '11212', '10011', '11203', '11241', '11242', '11243', '11249', '11252', '11256', '11101', '10017', '10167', '10016', '10110', '10165', '10168', '10169', '10170', '10173', '10174', '11109', '11378', '11385', '10098', '10119', '11210', '11104', '11220', '11207', '10001', '10199', '11219', '07310', '07302', '11230', '10022', '10152', '10153', '10154', '10171', '10172', '10020', '10103', '10111', '10112', '10177', '10065', '11204', '07304', '11379', '10018', '10036', '10044', '11377', '11236', '07030', '11106', '11228', '11208', '11239', '10021', '10019', '10075', '10162', '10200', '11103', '11209', '07305', '11373', '11102', '07306', '07086', '11374', '11223', '10023', '07307', '11372', '10069', '10028', '10128', '11421', '11416', '07087', '11234', '11214', '11229', '11369', '11418', '11417', '11370', '11105', '10029', '11375', '11414', '11368', '07093', '07047', '11415', '10301', '10024', '10025', '11371', '07002', '10026', '11224', '11235', '07094', '11367', '10035', '11419', '10305', '10115', '10027', '10304', '07022', '10037', '11435', '07032', '11420', '10454', '11354', '10455', '11355', '10310', '11436', '07020', '07010', '10030', '07105', '10451', '11356', '11433', '11697', '10474', '11351', '11432', '10031', '10039', '10302', '11366', '11430', '07657', '11434', '11365', '07029', '07073', '07072', '11358', '10456', '11694', '07650', '07031', '10032', '07201', '10459', '07071', '10473', '10452', '07114', '07070', '11357', '11693', '07643', '10472', '07024', '07102', '11423', '07074', '10033', '10303', '07660', '10453', '11412', '11361', '10465', '10460', '10457', '11692', '10306', '07206', '11413', '11360', '11364', '07605', '10462', '07104', '11427', '10040', '07608', '07109', '11359', '11429', '10314', '07108', '11428', '07606', '10311', '07075', '10458', '07107', '11096', '07103', '07110', '11411', '11691', '07112', '10034', '11422', '11363', '07603', '07604', '10468', '07057', '10461', '11362', '07014', '07632', '11516', '07666', '07202', '11581', '10467', '10463', '07018', '10308', '07055', '11426', '07601', '07205', '07208', '07017', '11598', '07003', '07631', '07111', '10469', '11021', '07106', '11004', '07026', '11003', '07028', '07644', '11001', '11580', '11509', '07050', '11005', '07036', '10312', '11023', '11020', '10470', '07012', '10471', '11042', '11559', '11024', '07013', '11557', '10466', '07008', '07670', '07083', '07607', '10464', '10475', '07042', '11040', '07203', '07079', '11010', '07663', '07204', '11565', '07011', '07043', '07621', '07040', '11563', '11030', '07662', '07407', '07661', '07033', '10705', '10550', '11518', '07052', '10803', '07646', '11552', '10704', '07064', '07088', '10553', '07009', '07044', '10309', '07626', '07628', '07503', '07065', '07041', '11530', '11599', '10552', '07001', '07016', '10805', '07504', '11050', '07424', '11501', '07513', '07627', '11570', '11596', '07652', '07077', '07410', '07501', '07510', '11561', '11507', '10701', '07649', '11572', '07081', '11577', '11558', '11576', '07066', '07021', '07027', '07641', '10708', '07514', '10307', '11550', '07095', '07505', '10801', '11510', '07067', '07734', '07758', '07078', '10703', '07522', '07524', '07624', '11514', '07039', '07630', '07068', '07512', '07502', '10538', '07090', '07006', '08861', '11548', '07718', '08830', '07737', '10804', '11553', '07640', '10709', '07732', '11579', '10707', '07716', '07506', '10710', '07452', '07092', '11575', '07901', '07676', '11556', '08863', '11520', '10706', '07648', '11590', '11568', '07450', '11542', '07730', '07647', '07076', '07675', '11545', '10543', '07508', '07735', '07642', '07004', '08832', '07928', '07023', '07748', '07721', '07936', '11554', '11566', '07470', '08820', '07423', '07432', '10583', '07974', '08879', '07058', '07932', '10983', '10522', '07760', '10976', '08840', '07481', '07062', '10964', '07704', '10528', '10502', '07677', '11753', '07463', '07940']

In [256]:
temp_df[temp_df['ZIP'].isin(temp_zip_list)]

Unnamed: 0.4,Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,NCT_NUMBER,STUDY_TITLE,STUDY_URL,STUDY_STATUS,CONDITIONS,START_DATE,...,PRIMARY_OUTCOMES,SECONDARY_OUTCOMES,INTERVENTIONS,FACILITY,CITY,STATE,ZIP,COUNTRY,POINT_OF_CONTACT,score
0,0,7744,7744,7744,NCT03191149,Osimertinib in Treating Patients With Stage II...,https://www.clinicaltrials.gov/study/NCT03191149,Recruiting,"Advanced Lung Non-Small Cell Carcinoma, Recurr...","April 25, 2018",...,The primary outcome is the best objective resp...,Secondary outcomes include progression-free su...,Participants will receive osimertinib orally o...,Anchorage Associates in Radiation Medicine,Anchorage,Alaska,98508,United States,"{'Name': None, 'Organization': None, 'Email': ...",0.813524


In [255]:
temp_zip_list.append('98508')

In [257]:
location = {'CITY': 'Chicago', 'STATE': 'Illinois', 'COUNTRY': 'United States'}

In [259]:
location_info = search.by_city(location['CITY'])[0]
location_info

SimpleZipcode(zipcode='60601', zipcode_type='STANDARD', major_city='Chicago', post_office_city='Chicago, IL', common_city_list=['Chicago'], county='Cook County', state='IL', lat=41.89, lng=-87.62, timezone='America/Chicago', radius_in_miles=0.7954545454545454, area_code_list='312,773,872', population=11110, population_density=28793.0, land_area_in_sqmi=0.39, water_area_in_sqmi=0.0, housing_units=8594, occupied_housing_units=6739, median_home_value=494200, median_household_income=95556, bounds_west=-87.633967, bounds_east=-87.61328, bounds_north=41.889045, bounds_south=41.880829)

In [None]:
center_lat = location_info.lat
center_lon = location_info.lng