## Import Dependencies

In [20]:
import pandas as pd
import requests
import json
from ekata_utility import key

In [2]:
def is_valid_phone(phone):
    phone = str(phone)
    return len(phone) == 10 and phone.isdigit() and phone[0] != '0' and len(set(phone)) > 2

## Define reverse lookup function

This function calls the API on a phone number and returns a dictionary of some of the objects and the json results

In [3]:
def reverse_lookup(phone_number):
    info_dict = {}
    base_url = 'https://proapi.whitepages.com/3.1/phone?'
    parameters = {'phone':phone_number, 'api_key': key}
    response =  requests.get(base_url, params=parameters)
    results = response.json()
    info_dict['OFFICE_TELEPHONE'] = phone_number
    try:
        info_dict["Name"] = results['belongs_to']['name']
    except:
        info_dict["Name"] = 'N/A'
    try:
        info_dict["Address"] = results['current_addresses'][0]['street_line_1']
    except:
        info_dict["Address"] = 'N/A'
    try:
        info_dict["City"] = results['current_addresses'][0]['city']
    except:
        info_dict["City"] = 'N/A'
    try:
        info_dict["State"] = results['current_addresses'][0]['state_code']
    except:
        info_dict["State"] = 'N/A'
    try:
        info_dict["PhoneType"] = results['belongs_to']['type']
    except:
        info_dict["PhoneType"] = 'N/A'
    try:
        info_dict["Valid"] = results['is_valid']
    except:
        info_dict["Valid"] = 'N/A'
    try:
        info_dict["Date"] = results['current_addresses'][0]['link_to_person_start_date']
    except:
        info_dict["Date"] = 'N/A'
    try:
        info_dict["LineType"] = results['line_type']
    except:
        info_dict["LineType"] = 'N/A'
    try:
        info_dict["Carrier"] = results['carrier']
    except:
        info_dict["Carrier"] = 'N/A'
    try:
        info_dict["Commercial"] = results['is_commercial']
    except:
        info_dict["Commercial"] ='N/A'
    try:
        info_dict["Industry"] = results['belongs_to']['industry'][0]
    except:
        info_dict["Industry"] = 'N/A'
    try:
        info_dict["AlternatePhone"] = results['alternate_phones'][0]['phone_number']
    except:
        info_dict["AlternatePhone"] = 'N/A'
    try:
        info_dict['Error'] = results['error']
    except:
        info_dict['Error'] = 'None'
    return(info_dict, results)


## Define the test_numbers function

This function calls reverse_lookup on each number in a dataframe and returns a dataframe and a list of json dictionaries

In [24]:
def test_numbers(dataframe,filename):
    results_dict_list = []
    fun_massive_list = []
    count = 0
    for row in dataframe.itertuples():
        count += 1
        print(count)
        new_dict = {}
        phone = str(row.OFFICE_TELEPHONE)[0:10]
        if is_valid_phone(phone) == False:
            print(f'Entry {count}: {phone} is not a valid phone number')
            break
        try:
            new_dict, phone_results = reverse_lookup(phone)
            results_dict_list.append(new_dict)
            fun_massive_list.append(phone_results)
        except:
            print(f'Reverse Phone lookup did not work')
            print(new_dict)
            print(phone_results)
            break
    try: 
        new_df = pd.DataFrame(results_dict_list)
    except: 
        return(fun_massive_list, {})
    try:
        with open(f'{filename}_data.txt', 'w') as outfile:
            json.dump(fun_massive_list, outfile)
    except:
        pass
    return(fun_massive_list, new_df)

## Read and test the connected numbers csv and export results to new csv

In [67]:
connected_short = pd.read_csv('Books.csv')
long_list, connected_df = test_numbers(connected_short)
connected_df.to_csv('ekata_connected.csv')

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35


## Testing a short list of disconnected numbers

In [74]:
disconnected_short = pd.read_csv('disconnected_short.csv')
disconnected_short.head()

Unnamed: 0,Address,City,Date,Name,Notes,OFFICE_TELEPHONE,PhoneType,Provider,QualityScore,State,Zipcode,PHYSICIAN_FIRST_NAME,PHYSICIAN_LAST_NAME,OFFICE_ADDRESS_LINE_2,OFFICE_ADDRESS_CITY,OFFICE_ADDRESS_STATE,OFFICE_ADDRESS_ZIP,COMMENTS
0,116 NORTHPORT AVE STE 112,BELFAST,7/21/2009,WALDO COUNTY GENERAL HOSPITAL,"IsMailable,IsPorted",2079303000.0,BUSINESS,LINCOLNVILLE COMM,LOW,ME,4915.0,MATTHEW,MOLISON,119 NORTHPORT AVE,BELFAST,ME,4915.0,NOT IN SERVICE
1,210 PROFESSIONAL PARK DR SE STE 12,BLACKSBURG,,NEUROLOGY SERVICES OF SWVA INC,"IsMailable,IsPossibleDisconnected,INF",5409610000.0,BUSINESS,VERIZON VIRGINIA INC,LOW,VA,24060.0,STEVEN,NACK,210 PROFESSIONAL PARK DR SE STE 12,BLACKSBURG,VA,24060.0,NOT IN SERVICE
2,,LOS ANGELES,,LA CTY OF,IsMailable,3232268000.0,BUSINESS,PACIFIC BELL,LOW,CA,90089.0,STEPHEN,NGUYEN,1190 VETERANS BLVD,REDWOOD CITY,CA,94063.0,NOT IN SERVICE
3,,PLANT CITY,9/10/2015,CONTINUCARE,"IsConnected,IsPorted",8137542000.0,RESIDENTIAL,TELEPORT COMM AM-FL,MED,FL,33563.0,CHINYERE,NNADI,228 W ALEXANDER ST,PLANT CITY,FL,33563.0,NOT IN SERVICE
4,1825 MAPLE RD,BUFFALO,7/26/2019,ALLYN M NORMAN DO PHYSICIANS GENERAL,"IsMailable,IsConnected,IsPorted",7162045000.0,BUSINESS,BANDWIDTH.COM - NY,HIGH,NY,14221.0,ALLYN,NORMAN,1825 MAPLE RD STE 100,WILLIAMSVILLE,NY,14221.0,NOT IN SERVICE


Test the numbers:

In [76]:
long_list, disconnected_df = test_numbers(disconnected_short.head())

1
2
3
4
5


Print the dataframe:

In [77]:
disconnected_df

Unnamed: 0,Address,AlternatePhone,Carrier,City,Commercial,Date,Error,Industry,LineType,Name,OFFICE_TELEPHONE,PhoneType,State,Valid
0,116 Northport Ave Apt 214,12073389290.0,Lincolnville Communications,Belfast,True,2016-11-18,,Ambulatory Health Care Services,Landline,County General Hospital,2079302639,Business,ME,True
1,210 Professional Park Dr SE Ste 12,,Verizon,Blacksburg,True,2016-11-15,,Ambulatory Health Care Services,Landline,Steven Douglas Nack DO,5409610410,Business,VA,True
2,1200 N State St Ste CT-A7d,,Pacific Bell,Los Angeles,True,2019-02-28,,Ambulatory Health Care Services,Landline,"Chen, Erica Cua MD",3232267556,Business,CA,True
3,228 W Alexander St,18137545480.0,Teleport Communications America,Plant City,True,2016-11-17,,Ambulatory Health Care Services,Landline,"Molodecki, Anthony M PA",8137542251,Business,FL,True
4,1825 Maple Rd Apt 100,,Bandwidth.com CLEC,Buffalo,True,2017-05-06,,Ambulatory Health Care Services,NonFixedVOIP,Adam Jason Norman MD,7162044532,Business,NY,True


Export to csv:

In [78]:
disconnected_df.to_csv('shorty.csv')

## Read and test the remaining disconnected numbers

In [80]:
new_disconnected_df = pd.read_csv('disconnected_for_ekata.csv')
disconnected_phone_df = new_disconnected_df.sample(n=35)
disconnected_phone_df

Unnamed: 0,Address,City,Date,Name,Notes,OFFICE_TELEPHONE,PhoneType,Provider,QualityScore,State,Zipcode,PHYSICIAN_FIRST_NAME,PHYSICIAN_LAST_NAME,OFFICE_ADDRESS_LINE_2,OFFICE_ADDRESS_CITY,OFFICE_ADDRESS_STATE,OFFICE_ADDRESS_ZIP,COMMENTS,Unnamed: 18
15,,,3/27/2013,DEPAUL M HOPE,"IsConnected,IsPorted",5857773511,RESIDENTIAL,TIME WARNER CABL-NY,MED,,,KIMBERLY,ERWAY,150 MOUNT HOPE AVE,ROCHESTER,NY,14620.0,NOT IN SERVICE,
14,,BETHLEHEM,7/30/2015,S GIRLY,"IsConnected,IsPorted",6104199092,RESIDENTIAL,RCN TELECOM SVCS PA,MED,PA,18015.0,CATHERINE,DURISHIN,2425 13TH ST NW,CANTON,OH,44708.0,NOT IN SERVICE,
0,,SAINT PETERSBURG,,L BHATTACHARJEE,"IsPossibleDisconnected,INF",7273020000,RESIDENTIAL,FRONTIER COMM OF FL,LOW,FL,33710.0,LAKSHMISRI,BHATTACHARJEE,9555 SEMINOLE BLVD STE 104,SEMINOLE,FL,33772.0,NOT IN SERVICE,
23,,SAN DIEGO,,G SRVS,"IsPossibleDisconnected,INF",8585739902,BUSINESS,PACIFIC BELL,LOW,CA,92111.0,AIREEN,GUTIERREZ,10173 FAIRHILL DR,SPRING VALLEY,CA,91977.0,NOT IN SERVICE,
48,152653 PO BOX,SAN DIEGO,3/27/2004,JUSTIN A ROBERTS,"IsMailable,IsPorted,IsWireless",6194027300,RESIDENTIAL,"T-MOBILE USA, INC.",LOW,CA,92195.0,PATRICIA,PISINGER,971 LANE AVE,CHULA VISTA,CA,91914.0,NOT IN SERVICE,
18,,GIBSONTON,,GSA DEPT OF VA,,8139727629,BUSINESS,FRONTIER COMM OF FL,LOW,FL,33534.0,SURINDER,GILL,5637 MARINE PKWY,NEW PRT RCHY,FL,34652.0,NOT IN SERVICE,
79,131 FULTON AVE,HEMPSTEAD,4/30/2014,ALLAN YOUNG MD PHYSICIANS GENERAL,"IsMailable,IsConnected,IsPorted",5164839020,BUSINESS,CABLEVSN LGHTPATH NY,HIGH,NY,11550.0,ALLAN,YOUNG,165 N VILLAGE AVE STE 5,ROCKVILLE CTR,NY,11570.0,NOT IN SERVICE,
36,619 19TH ST S,BIRMINGHAM,,UAB,"IsMailable,IsConnected",2059393361,BUSINESS,BELLSOUTH SO CNTL,HIGH,AL,35233.0,ALICIA,LEADFORD,2000 CHURCH ST,NASHVILLE,TN,37236.0,NOT IN SERVICE,
40,3955 EAGLE CREEK PKWY,INDIANAPOLIS,,MASSICOTTE STEPHEN J MD PC,"IsMailable,IsPossibleDisconnected,INF",3172808410,BUSINESS,AMERITECH INDIANA,LOW,IN,46254.0,STEPHEN,MASSICOTTE,3955 EAGLE CREEK PKWY STE C,INDIANAPOLIS,IN,46254.0,NOT IN SERVICE,
78,200 MAIN ST STE 350,PAWTUCKET,,ANESTHESIA CARE INC,"IsMailable,IsPossibleDisconnected,INF",4017267300,BUSINESS,VERIZON NEW ENGLAND,LOW,RI,2860.0,SUSAN,WUTHRICH,200 MAIN ST STE 350,PAWTUCKET,RI,2860.0,NOT IN SERVICE,


Test

In [81]:
dis_long_list, new_disconnected_df = test_numbers(disconnected_phone_df)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35


Export to csv

In [83]:
new_disconnected_df.to_csv('ekata_disconnected.csv')

## New API Calls

In [13]:
disconnected = pd.read_csv('EKATA_MODEL_DISCONNECTED.csv')
connected = pd.read_csv('EKATA_MODEL_CONNECTED.csv')

In [29]:
connected_list, connected_df =  test_numbers(connected[5:], 'connected')

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93


In [31]:
connected_df.to_csv('connected_ekata_september_19.csv')
short_connected_df.to_csv('short_connected_ekata_september_19.csv')

In [32]:
disconnected_list, disconnected_df =  test_numbers(disconnected, 'disconnected')

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101


In [34]:
disconnected_df.to_csv('disconnected_ekata_september_19.csv')

In [42]:
disconnected_all = pd.merge(disconnected_df, disconnected, on = 'OFFICE_TELEPHONE')
connected_all = pd.merge(connected_df, connected, on = 'OFFICE_TELEPHONE')

In [40]:
disconnected['OFFICE_TELEPHONE'] = disconnected['OFFICE_TELEPHONE'].astype('str')
connected['OFFICE_TELEPHONE'] = connected['OFFICE_TELEPHONE'].astype('str')

In [44]:
disconnected_all = disconnected_all.fillna("None")
connected_all = connected_all.fillna("None")

In [112]:
disconnected_all.to_csv('disconnected_ekata_all_september_19.csv',index=False)
connected_all.to_csv('connected_ekata_all_september_19.csv',index=False)

In [107]:
zips3 = []
for number in short_connected_list:
    try:
        zips3.append(number['current_addresses'][0]['postal_code'])
    except:
        zips3.append('None')

In [53]:
disconnected_list[0]['current_addresses'][0]['postal_code']

'21220'

In [56]:
zips

['21220',
 '89110',
 'P0G 1G0',
 '19713',
 '92335',
 '85018',
 '10025',
 '76210',
 '02467',
 '84094',
 '44460',
 '20892',
 '48126',
 '78404',
 '20817',
 '21045',
 '92870',
 '44028',
 '91302',
 '28314',
 '28467',
 '92057',
 '78102',
 '18201',
 '97229',
 '79119',
 '33021',
 '47802',
 '30606',
 '38103',
 '95815',
 '94061',
 '07102',
 '83605',
 '93465',
 '07920',
 '98683',
 '98007',
 '31312',
 '85016',
 '54701',
 '92618',
 '10025',
 '93215',
 '02135',
 '92311',
 '46041',
 '55804',
 '27710',
 'None',
 'None',
 '10025',
 '02895',
 '38132',
 '94015',
 '46307',
 '07102',
 '36608',
 '94112',
 '19301',
 '02908',
 '20876',
 '00926',
 '84124',
 '92806',
 '37412',
 '91367',
 '37764',
 '32907',
 '37130',
 '02124',
 '92405',
 '03103',
 '60936',
 '48314',
 '80219',
 '21046',
 '85234',
 '98006',
 '10003',
 '00970',
 '29605',
 '96813',
 '48075',
 '00969',
 '47203',
 '60564',
 '44708',
 '92335',
 '73036',
 '01604',
 '27514',
 '37650',
 '10025',
 '50701',
 '30041',
 '60169',
 '73071',
 '38343',
 '40536',


In [92]:
associated_people_list = []
for number in connected_list:
    new_dict = {}
    associated_people = []
    peeps = number['associated_people']
    count = len(peeps)
    while count>0:
#         print(peeps[count-1]['name'])
        associated_people.append(peeps[count-1]['name'])
        count = count-1
#         peeps.pop()
    try:
        new_dict['OFFICE_TELEPHONE']=number['phone_number'][2:]
    except:
        new_dict['OFFICE_TELEPHONE']='wut'
    new_dict['Associated_people']=associated_people
    associated_people_list.append(new_dict)
pd.DataFrame(associated_people_list)

Unnamed: 0,Associated_people,OFFICE_TELEPHONE
0,[Dr. Tina Jones Brown Sr.],7704382942
1,"[Jessica Moran, Jennifer Anne Haydu, Linda Die...",9085222232
2,[],2156623606
3,[],3104236143
4,[],6147884440
5,[],4097723410
6,"[Ms. Amy Danielle Huddleston, Ms. Terri Lee Br...",4052309435
7,"[Aliyu Ahmadu Ojarigi, Mr. David M Oster, Sand...",6126263000
8,"[Mr. Leeroy Roy Mccurley, Dr. Ray Friedman Aro...",9722635272
9,[],4695579627


In [88]:
len(connected_list[23]['associated_people'])

1

In [91]:
disconnected_list

[{'id': 'Phone.b2376fef-a2e0-4b08-cfe3-bc7128b7e993',
  'phone_number': '+14106869019',
  'is_valid': True,
  'country_calling_code': '1',
  'line_type': 'Landline',
  'carrier': 'Verizon',
  'is_prepaid': False,
  'is_commercial': True,
  'belongs_to': {'id': 'Business.2ef5a367-8756-41f0-8f20-82e6be23db4f',
   'name': 'Middle River Family Practice',
   'firstname': None,
   'middlename': None,
   'lastname': None,
   'alternate_names': [],
   'age_range': None,
   'gender': None,
   'type': 'Business',
   'link_to_phone_start_date': '2016-11-20',
   'industry': ['Ambulatory Health Care Services']},
  'current_addresses': [{'id': 'Location.1857bbb0-c06f-41f2-b61a-bc064e5b2c55',
    'location_type': 'Address',
    'street_line_1': '107 Beacon Rd',
    'street_line_2': None,
    'city': 'Middle River',
    'postal_code': '21220',
    'zip4': '3504',
    'state_code': 'MD',
    'country_code': 'US',
    'lat_long': {'latitude': 39.336388,
     'longitude': -76.456691,
     'accuracy': 'Ro

In [98]:
connected_all['Zipcode']=zips2

In [100]:
connected_all.fillna('None')

Unnamed: 0,Address,AlternatePhone,Carrier,City,Commercial,Date,Error,Industry,LineType,Name,...,Valid,PHYSICIAN_FIRST_NAME,PHYSICIAN_LAST_NAME,OFFICE_ADDRESS_LINE_2,OFFICE_ADDRESS_CITY,OFFICE_ADDRESS_STATE,OFFICE_ADDRESS_ZIP,PHYSICIAN_ME_NUMBER,OFFICE_ADDRESS_LINE_1,Zipcode
0,3969 S Cobb Dr SE Ste 201,,Bandwidth SMSEnabled,Smyrna,True,2016-11-17,,Ambulatory Health Care Services,NonFixedVOIP,Cumberland Women's Health Center,...,True,TINA,JONES,3200 HIGHLANDS PKWY SE STE 250,SMYRNA,GA,30082,3841781987,CUMBERLAND WOMEN'S HLTH CTR PC,30080
1,99 Beauvoir Ave,,MCI Metro ATS,Summit,True,2018-08-28,,Ambulatory Health Care Services,Landline,"Logang, Linda N.P.",...,True,JILL,RATHYEN,99 BEAUVOIR AVE,SUMMIT,NJ,7901,2301030595,DEPARTMENT OF EMERGENCY MEDICINE,07901
2,3400 Spruce St,,Verizon,Philadelphia,True,2018-08-28,,Ambulatory Health Care Services,Landline,"Mikhail, Fadi W MD",...,True,MICHAEL,GELFAND,3400 SPRUCE ST,PHILADELPHIA,PA,19104,3520060269,DEPARTMENT OF NEUROLOGY,19104
3,8700 Beverly Blvd,,Twilio,West Hollywood,True,2019-02-28,,Ambulatory Health Care Services,NonFixedVOIP,"Rezaie, Ali MD",...,True,MARK,PIMENTEL,8700 BEVERLY BLVD STE 7,W HOLLYWOOD,CA,90048,6201920190,DIV OF GASTROENTEROLOGY,90048
4,3663 Ridge Mill Dr Ste 100,+16147884459,Level 3 Communications,Hilliard,True,2018-08-28,,Ambulatory Health Care Services,NonFixedVOIP,"Kapoor, Shruti G MD",...,True,SHRUTI,KAPOOR,5100 W BROAD ST,COLUMBUS,OH,43228,2307060208,DOCTORS HOSPITAL,43026
5,301 University Blvd,+14097723394,AT&T,Galveston,True,2017-09-09,,Ambulatory Health Care Services,Landline,"Zahiruddin, Adil S MD",...,True,SANJIV,SUR,301 UNIVERSITY BLVD,GALVESTON,TX,77555,49574790025,INT MED/ALLERGY/IMMUNOLOGY,77555
6,816 NW 46th St,+18035484350,Coxcom,Oklahoma City,False,1998-08-01,,,FixedVOIP,Mr. Robert R German,...,True,THOMAS,JANSSEN,9600 BROADWAY EXT,OKLAHOMA CITY,OK,73114,3901790859,MCBRIDE ORTHOPEDIC HOSPITAL,73118
7,717 Delaware St SE Ste 353,,AT&T,Minneapolis,True,2017-05-06,,Ambulatory Health Care Services,Landline,Mirna Georges Boumitri MD,...,True,SUJATHA,TATA,4709 UPPER TER,MINNEAPOLIS,MN,55435,49565030140,MD INTERNAL MEDICINE,55414
8,4560 Lake Ridge Pkwy Ste 200,,Level 3 Communications,Grand Prairie,True,2019-02-28,,Ambulatory Health Care Services,NonFixedVOIP,Methodist Family Health Center-South Grand Pra...,...,True,PAMELA,SANTONE,4560 LAKE RIDGE PKWY STE 200,GRAND PRAIRIE,TX,75052,4878970948,METHODIST FAMILY HEALTH CENTER,75052
9,4333 N Josey Ln Ste 202,+12147310050,ICG Telecom Group,Carrollton,True,2016-11-20,,Ambulatory Health Care Services,NonFixedVOIP,North Texas Kidney Disease,...,True,ANJANA,JAGALUR,4333 N JOSEY LN STE 202,CARROLLTON,TX,75010,49639980170,N TEXAS KIDNEY DISEASE ASSOCS,75010


In [105]:
pd.concat([short_connected_df,connected_df])

Unnamed: 0,Address,AlternatePhone,Carrier,City,Commercial,Date,Error,Industry,LineType,Name,OFFICE_TELEPHONE,PhoneType,State,Valid
0,93 Pond St,,Verizon,Sharon,True,2016-11-19,,Ambulatory Health Care Services,Landline,Heather Dawn Boxerman MD,7817849212,Business,MA,True
1,300 Longwood Ave,,Verizon,Boston,True,2019-02-28,,Ambulatory Health Care Services,Landline,"Gramza, Claire E",6173557737,Business,MA,True
2,1 Choctaw Way,+19185677046,Bandwidth.com CLEC,Talihina,True,2017-08-09,,Hospitals,NonFixedVOIP,Choctaw Nation,9185677000,Business,OK,True
3,,,Teleport Communications America,Cleveland,False,,,,Landline,Cleveland C The,2164446968,Person,OH,True
4,4910 Ritter Rd,,Bandwidth.com CLEC,Mechanicsburg,True,2016-11-17,,Ambulatory Health Care Services,NonFixedVOIP,Concentra Medical Center,7177951819,Business,PA,True
0,3969 S Cobb Dr SE Ste 201,,Bandwidth SMSEnabled,Smyrna,True,2016-11-17,,Ambulatory Health Care Services,NonFixedVOIP,Cumberland Women's Health Center,7704382942,Business,GA,True
1,99 Beauvoir Ave,,MCI Metro ATS,Summit,True,2018-08-28,,Ambulatory Health Care Services,Landline,"Logang, Linda N.P.",9085222232,Business,NJ,True
2,3400 Spruce St,,Verizon,Philadelphia,True,2018-08-28,,Ambulatory Health Care Services,Landline,"Mikhail, Fadi W MD",2156623606,Business,PA,True
3,8700 Beverly Blvd,,Twilio,West Hollywood,True,2019-02-28,,Ambulatory Health Care Services,NonFixedVOIP,"Rezaie, Ali MD",3104236143,Business,CA,True
4,3663 Ridge Mill Dr Ste 100,+16147884459,Level 3 Communications,Hilliard,True,2018-08-28,,Ambulatory Health Care Services,NonFixedVOIP,"Kapoor, Shruti G MD",6147884440,Business,OH,True


In [106]:
connected_df

Unnamed: 0,Address,AlternatePhone,Carrier,City,Commercial,Date,Error,Industry,LineType,Name,OFFICE_TELEPHONE,PhoneType,State,Valid
0,3969 S Cobb Dr SE Ste 201,,Bandwidth SMSEnabled,Smyrna,True,2016-11-17,,Ambulatory Health Care Services,NonFixedVOIP,Cumberland Women's Health Center,7704382942,Business,GA,True
1,99 Beauvoir Ave,,MCI Metro ATS,Summit,True,2018-08-28,,Ambulatory Health Care Services,Landline,"Logang, Linda N.P.",9085222232,Business,NJ,True
2,3400 Spruce St,,Verizon,Philadelphia,True,2018-08-28,,Ambulatory Health Care Services,Landline,"Mikhail, Fadi W MD",2156623606,Business,PA,True
3,8700 Beverly Blvd,,Twilio,West Hollywood,True,2019-02-28,,Ambulatory Health Care Services,NonFixedVOIP,"Rezaie, Ali MD",3104236143,Business,CA,True
4,3663 Ridge Mill Dr Ste 100,+16147884459,Level 3 Communications,Hilliard,True,2018-08-28,,Ambulatory Health Care Services,NonFixedVOIP,"Kapoor, Shruti G MD",6147884440,Business,OH,True
5,301 University Blvd,+14097723394,AT&T,Galveston,True,2017-09-09,,Ambulatory Health Care Services,Landline,"Zahiruddin, Adil S MD",4097723410,Business,TX,True
6,816 NW 46th St,+18035484350,Coxcom,Oklahoma City,False,1998-08-01,,,FixedVOIP,Mr. Robert R German,4052309435,Person,OK,True
7,717 Delaware St SE Ste 353,,AT&T,Minneapolis,True,2017-05-06,,Ambulatory Health Care Services,Landline,Mirna Georges Boumitri MD,6126263000,Business,MN,True
8,4560 Lake Ridge Pkwy Ste 200,,Level 3 Communications,Grand Prairie,True,2019-02-28,,Ambulatory Health Care Services,NonFixedVOIP,Methodist Family Health Center-South Grand Pra...,9722635272,Business,TX,True
9,4333 N Josey Ln Ste 202,+12147310050,ICG Telecom Group,Carrollton,True,2016-11-20,,Ambulatory Health Care Services,NonFixedVOIP,North Texas Kidney Disease,4695579627,Business,TX,True


In [108]:
SHORT = pd.merge(short_connected_df, connected, on = 'OFFICE_TELEPHONE')
SHORT['Zipcode']=zips3

In [111]:
connected_all = pd.concat([SHORT,connected_all])