In [1]:
# pip install googletrans==4.0.0rc1

In [2]:
from googletrans import Translator
import pandas as pd
import time

In [3]:
sample = pd.read_csv('sample.csv')
sample.head(2)

Unnamed: 0,酒店中文名称 Property Name in Chinese,酒店中文地址 Address in Chinese,Longitude,Latitude
0,桔子水晶太原南站晋阳街酒店,山西省太原市小店区荣军北街99号,37.78401,112.57727
1,桔子临汾五洲国际广场酒店,山西省临汾市开发区河汾路8号,36.10251,111.51095


In [4]:
sample.columns

Index(['酒店中文名称 Property Name in Chinese', '酒店中文地址 Address in Chinese',
       'Longitude', 'Latitude'],
      dtype='object')

In [5]:
# init the Google API translator
translator = Translator()

In [6]:
# translate a spanish text to english text (by default)
translation = translator.translate("山西",src='zh-cn')

In [7]:
print("{} ({}) --> {} ({})".format(translation.origin, 
                                   translation.src,
                                   translation.text,
                                   translation.dest))

山西 (zh-cn) --> Shanxi (en)


# Bulk Translation

In [8]:
en_address=[]
for i in sample['酒店中文地址 Address in Chinese']:
    translation = translator.translate(i, dest='en')
    en_address.append(translation.text)

In [9]:
len(en_address)

5

In [10]:
en_hotel_name=[]
for j in sample['酒店中文名称 Property Name in Chinese']:
    translation = translator.translate(j, dest='en')
    en_hotel_name.append(translation.text)

In [11]:
len(en_address)

5

In [12]:
sample['en_hotel_name'] = en_hotel_name
sample['en_address'] = en_address

In [13]:
sample.tail(1)

Unnamed: 0,酒店中文名称 Property Name in Chinese,酒店中文地址 Address in Chinese,Longitude,Latitude,en_hotel_name,en_address
4,桔子西安钟鼓楼酒店,西安市碑林区南大街16号,34.25347,108.94665,Orange Xi'an Zhong Drum Tower,"No. 16 South Street, Beilin District, Xi'an"


In [14]:
sample.to_csv('sample_translated.csv')

# Reverse Geocode Zipcode

In [15]:
from geopy.geocoders import Nominatim

In [16]:
sample['lat_lng'] = sample[["Longitude","Latitude"]].apply(tuple, axis=1)

In [17]:
sample.head(1)

Unnamed: 0,酒店中文名称 Property Name in Chinese,酒店中文地址 Address in Chinese,Longitude,Latitude,en_hotel_name,en_address,lat_lng
0,桔子水晶太原南站晋阳街酒店,山西省太原市小店区荣军北街99号,37.78401,112.57727,Orange Crystal Taiyuan South Railway Station J...,"No. 99, Rongjun North Street, Xiaodian Distric...","(37.78401, 112.57727)"


In [18]:
geolocator = Nominatim(user_agent="Chrome")

In [19]:
zipcodes=[]
addresses=[]
input_list=[]
for i in sample['lat_lng']:
    time.sleep(1)
    try:
        location = geolocator.reverse(str(i[0])+","+str(i[1]))
        zipcodes.append(location.raw['address']['postcode'])
        addresses.append(location.address)
        input_list.append(i)
    except:
        input_list.append(i)
        zipcodes.append('error')
        addresses.append('error')

In [20]:
sample['input_list'] = input_list
sample['zipcodes'] = zipcodes
sample['addresses'] = addresses

In [21]:
sample.tail(5)

Unnamed: 0,酒店中文名称 Property Name in Chinese,酒店中文地址 Address in Chinese,Longitude,Latitude,en_hotel_name,en_address,lat_lng,input_list,zipcodes,addresses
0,桔子水晶太原南站晋阳街酒店,山西省太原市小店区荣军北街99号,37.78401,112.57727,Orange Crystal Taiyuan South Railway Station J...,"No. 99, Rongjun North Street, Xiaodian Distric...","(37.78401, 112.57727)","(37.78401, 112.57727)",030012,"黄陵街道, 小店区, 太原市, 山西省, 030012, 中国"
1,桔子临汾五洲国际广场酒店,山西省临汾市开发区河汾路8号,36.10251,111.51095,Orange Linyi Wuzhou International Plaza Hotel,"No. 8, Hedong Road, Linyi City, Linyi City, Sh...","(36.10251, 111.51095)","(36.10251, 111.51095)",041000,"河汾路, 滨河街道, 尧都区, 临汾市, 山西省, 041000, 中国"
2,桔子廊坊建设北路酒店,廊坊市广阳区建设北路77号,39.53777,116.71313,Orange Langfang Jianshe North Road Hotel,"No. 77, Jianshe North Road, Guangyang District...","(39.53777, 116.71313)","(39.53777, 116.71313)",error,error
3,桔子上海淮海中路十号名邸酒店,上海市徐汇区 宝庆路10号7号楼,31.21082,121.44972,Orange Shanghai Huaihai Middle Road No. 10 Fam...,"Building No. 10, Baofeng Road, Xuhui District,...","(31.21082, 121.44972)","(31.21082, 121.44972)",200032,"10, 太原路, 小木桥, 徐汇区, 上海市, 200032, 中国"
4,桔子西安钟鼓楼酒店,西安市碑林区南大街16号,34.25347,108.94665,Orange Xi'an Zhong Drum Tower,"No. 16 South Street, Beilin District, Xi'an","(34.25347, 108.94665)","(34.25347, 108.94665)",710001,"顺城南路, 柏树林, 碑林区, 西安市, 陕西省, 710001, 中国"


In [23]:
sample.to_csv('sample_translated_and_zipcode.csv')