Skip to content

Commit 5e5ce96

Browse files
committed
clean the phone number
1 parent f696b4d commit 5e5ce96

File tree

1 file changed

+9
-4
lines changed

1 file changed

+9
-4
lines changed

writing_clean_data.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
start = 0
77
file_path = f'yelp-{city}-clean.txt'
88

9-
while start < 60:
9+
while start < 990:
1010
print(start)
1111
url = base_url.format(city, start)
1212
response = requests.get(url)
@@ -19,11 +19,12 @@
1919
for biz in businesses:
2020
first_line = ""
2121
second_line = ""
22+
phone_number = ""
2223
try:
2324
title = biz.find('a', {'class': 'biz-name'}).text
2425
address = biz.find('address').contents
2526
# print(address)
26-
phone = biz.find('span', {'class': 'biz-phone'}).text
27+
phone = biz.find('span', {'class': 'biz-phone'}).contents
2728
region = biz.find('span', {'class': 'neighborhood-str-list'}).contents
2829
count += 1
2930
for item in address:
@@ -36,6 +37,11 @@
3637
first_line += item.getText() + " "
3738
else:
3839
second_line += item.strip(" \n\t\r") + " "
40+
for item in phone:
41+
if "br" in item:
42+
phone_number += item.getText() + " "
43+
else:
44+
phone_number += item.strip(" \n\t\r") + " "
3945

4046
except Exception as e:
4147
print(e)
@@ -46,7 +52,7 @@
4652
phone = None
4753
region = None
4854

49-
detail = f"{title}\n{second_line}\n{phone}\n"
55+
detail = f"{title}\n{second_line}\n{phone_number}\n"
5056
print(detail)
5157

5258
try:
@@ -57,4 +63,3 @@
5763
logs.close()
5864

5965
start += 30
60-

0 commit comments

Comments
 (0)