# Facebook & Website Mining (Silver 2)

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob
from Apify_Scrapper_Functions import Get_Phone_Number_From_Facebook, Get_Phone_Number_From_Website

### Import scrapped data

In [2]:
Scrapped_Data = pd.read_parquet("./Staging/Silver/Silver_data_1.parquet", engine="fastparquet")

# Check if UEN is unique
print(Scrapped_Data["UEN"].is_unique)
print(Scrapped_Data.shape)

True
(100, 16)


### Generate PIC Source Column

In [3]:
Scrapped_Data["PIC Source 1"] = None
Scrapped_Data["PIC Source 2"] = None
Scrapped_Data["PIC Source 3"] = None
Scrapped_Data = Scrapped_Data[["UEN","Phones", "Emails", "Website", "Facebook", "LinkedIn", "Instagram",  "TikTok", "operational_street", "operational_unit", "operational_postal_code", "operational_address", "PIC Source 1", "PIC Source 2", "PIC Source 3"]]

Scrapped_Data

Unnamed: 0,UEN,Phones,Emails,Website,Facebook,LinkedIn,Instagram,TikTok,operational_street,operational_unit,operational_postal_code,operational_address,PIC Source 1,PIC Source 2,PIC Source 3
0,52895287X,[+6564836187],,https://hungrygowhere.com/singapore/chng_li_ch...,,,,,10 ANG MO KIO INDUSTRIAL PARK 2A AMK AUTOPOINT,01-21,568047,10 ANG MO KIO INDUSTRIAL PARK 2A AMK AUTOPOINT...,,,
1,53414661M,,,https://catzpatisserie.cococart.co/,[https://www.facebook.com/CaTzPatisserie/],,[https://www.instagram.com/catzpatisserie/],,25 PHOENIX RISE HUA MEI GARDENS,,668227,25 PHOENIX RISE HUA MEI GARDENS Singapore 668227,,,
2,201505039Z,[+6562211336],[sup@wantonsg.com],https://wantonsg.com/,[https://www.facebook.com/wanton.sg/],,[https://www.instagram.com/wanton.sg/?hl=en],,458 RACE COURSE ROAD,,218699,458 RACE COURSE ROAD Singapore 218699,,,
3,53399916B,,,,,,,,200 JALAN SULTAN TEXTILE CENTRE,02-04,199018,200 JALAN SULTAN TEXTILE CENTRE 02-04 Singapor...,,,
4,202429107W,,,,,,,,348 JALAN BOON LAY,01-07B,619529,348 JALAN BOON LAY 01-07B Singapore 619529,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,T06LL0510H,,,,,,,,510 UPPER SERANGOON ROAD SUNSHINE LODGE,04-04,534530,510 UPPER SERANGOON ROAD SUNSHINE LODGE 04-04 ...,,,
96,202011894W,[+6591679396],,https://www.mycareersfuture.gov.sg/job/admin/o...,,,,,5 PEREIRA ROAD ASIAWIDE INDUSTRIAL BUILDING,04-01,368025,5 PEREIRA ROAD ASIAWIDE INDUSTRIAL BUILDING 04...,,,
97,53478000L,,,https://chickenpie.sg/,,,,,21 COMPASSVALE STREET COMPASSVALE PRIMARY SCHOOL,,545091,21 COMPASSVALE STREET COMPASSVALE PRIMARY SCHO...,,,
98,201205374H,,,,,,,,101 KITCHENER ROAD JALAN BESAR PLAZA,02-05,208511,101 KITCHENER ROAD JALAN BESAR PLAZA 02-05 Sin...,,,


### Insert PIC 1 Source that has phone number with RecordOwl

In [4]:
# Assign PIC Source 1 as "RecordOwl" if Phones has any value
Scrapped_Data.loc[Scrapped_Data["Phones"].notna(), "PIC Source 1"] = "RecordOwl"
Scrapped_Data

Unnamed: 0,UEN,Phones,Emails,Website,Facebook,LinkedIn,Instagram,TikTok,operational_street,operational_unit,operational_postal_code,operational_address,PIC Source 1,PIC Source 2,PIC Source 3
0,52895287X,[+6564836187],,https://hungrygowhere.com/singapore/chng_li_ch...,,,,,10 ANG MO KIO INDUSTRIAL PARK 2A AMK AUTOPOINT,01-21,568047,10 ANG MO KIO INDUSTRIAL PARK 2A AMK AUTOPOINT...,RecordOwl,,
1,53414661M,,,https://catzpatisserie.cococart.co/,[https://www.facebook.com/CaTzPatisserie/],,[https://www.instagram.com/catzpatisserie/],,25 PHOENIX RISE HUA MEI GARDENS,,668227,25 PHOENIX RISE HUA MEI GARDENS Singapore 668227,,,
2,201505039Z,[+6562211336],[sup@wantonsg.com],https://wantonsg.com/,[https://www.facebook.com/wanton.sg/],,[https://www.instagram.com/wanton.sg/?hl=en],,458 RACE COURSE ROAD,,218699,458 RACE COURSE ROAD Singapore 218699,RecordOwl,,
3,53399916B,,,,,,,,200 JALAN SULTAN TEXTILE CENTRE,02-04,199018,200 JALAN SULTAN TEXTILE CENTRE 02-04 Singapor...,,,
4,202429107W,,,,,,,,348 JALAN BOON LAY,01-07B,619529,348 JALAN BOON LAY 01-07B Singapore 619529,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,T06LL0510H,,,,,,,,510 UPPER SERANGOON ROAD SUNSHINE LODGE,04-04,534530,510 UPPER SERANGOON ROAD SUNSHINE LODGE 04-04 ...,,,
96,202011894W,[+6591679396],,https://www.mycareersfuture.gov.sg/job/admin/o...,,,,,5 PEREIRA ROAD ASIAWIDE INDUSTRIAL BUILDING,04-01,368025,5 PEREIRA ROAD ASIAWIDE INDUSTRIAL BUILDING 04...,RecordOwl,,
97,53478000L,,,https://chickenpie.sg/,,,,,21 COMPASSVALE STREET COMPASSVALE PRIMARY SCHOOL,,545091,21 COMPASSVALE STREET COMPASSVALE PRIMARY SCHO...,,,
98,201205374H,,,,,,,,101 KITCHENER ROAD JALAN BESAR PLAZA,02-05,208511,101 KITCHENER ROAD JALAN BESAR PLAZA 02-05 Sin...,,,


### Separate W/O Phone Numbers

In [5]:
# Separate rows with and without phone numbers
df_with_phones = Scrapped_Data[Scrapped_Data["Phones"].notna()]
df_without_phones = Scrapped_Data[Scrapped_Data["Phones"].isna()]

# Store unique phone numbers inside With_Phones_1
df_With_Phones_1 = df_with_phones[df_with_phones["Phones"].duplicated(keep=False) == False]

# Store duplicate phone numbers inside refilter_df_1
df_Duplicate_Phones_1 = df_with_phones[df_with_phones["Phones"].duplicated(keep=False) == True]

# Print shapes
print("Total having Phone numbers:", df_with_phones.shape)
print("Total without Phone numbers:", df_without_phones.shape)
print("Unique phone rows (df_With_Phones_1):", df_With_Phones_1.shape)
print("Duplicate phone rows (df_Duplicate_Phones_1):", df_Duplicate_Phones_1.shape)


Total having Phone numbers: (39, 15)
Total without Phone numbers: (61, 15)
Unique phone rows (df_With_Phones_1): (37, 15)
Duplicate phone rows (df_Duplicate_Phones_1): (2, 15)


### Merge df_without_phones with df_Duplicate_Phones_1

In [6]:
df_without_phones_and_Duplicates = (
    pd.concat([df_Duplicate_Phones_1, df_without_phones], ignore_index=True)
      .drop_duplicates(subset=["UEN"])
)

df_without_phones_and_Duplicates.shape

(63, 15)

In [7]:
### Final W/O Phones 

print(df_With_Phones_1.shape)
print(df_without_phones_and_Duplicates.shape)

print(df_With_Phones_1["UEN"].is_unique)
print(df_without_phones_and_Duplicates["UEN"].is_unique)

df_without_phones_and_Duplicates["PIC Source 1"] = None

(37, 15)
(63, 15)
True
True


### Separate W/O Facebook from df_without_phones_and_Duplicates

In [8]:
# DataFrame with Facebook (not null and not empty)
df_with_facebook = df_without_phones_and_Duplicates[
    df_without_phones_and_Duplicates["Facebook"].notna() &
    (df_without_phones_and_Duplicates["Facebook"] != "")
]

# DataFrame without Facebook (null or empty)
df_without_facebook = df_without_phones_and_Duplicates[
    df_without_phones_and_Duplicates["Facebook"].isna() |
    (df_without_phones_and_Duplicates["Facebook"] == "")
]

# Check the shapes
print(f"With Facebook: {df_with_facebook.shape}")
print(f"Without Facebook: {df_without_facebook.shape}")

With Facebook: (9, 15)
Without Facebook: (54, 15)


In [9]:
print(df_with_facebook["UEN"].is_unique)


True


In [10]:
df_with_facebook.shape

(9, 15)

### Extract Phone Number of Facebook Pages

In [11]:

facebook_phone_df, facebook_no_phone_df = Get_Phone_Number_From_Facebook(df_with_facebook)


Valid Facebook URLs: 9, Filtered out (profiles/invalid): 0
Processing 9 Facebook pages...
Batch 1/1...
  Original URLs: 9, Unique URLs: 9


[36m[apify.facebook-page-contact-information runId:v495A8zZBIw8xARuV][0m -> Status: RUNNING, Message: 
[36m[apify.facebook-page-contact-information runId:v495A8zZBIw8xARuV][0m -> 2026-01-27T07:39:01.001Z ACTOR: Pulling container image of build VRed9P5z8GOu5TP7V from registry.
[36m[apify.facebook-page-contact-information runId:v495A8zZBIw8xARuV][0m -> 2026-01-27T07:39:01.006Z ACTOR: Creating container.
[36m[apify.facebook-page-contact-information runId:v495A8zZBIw8xARuV][0m -> 2026-01-27T07:39:01.066Z ACTOR: Starting container.
[36m[apify.facebook-page-contact-information runId:v495A8zZBIw8xARuV][0m -> 2026-01-27T07:39:01.067Z ACTOR: Running under "LIMITED_PERMISSIONS" permission level.
[36m[apify.facebook-page-contact-information runId:v495A8zZBIw8xARuV][0m -> 2026-01-27T07:39:02.033Z [32mINFO[39m  System info[90m {"apifyVersion":"3.5.1","apifyClientVersion":"2.19.0","crawleeVersion":"3.15.2","osType":"Linux","nodeVersion":"v20.19.6"}[39m
[36m[apify.facebook-page-conta

  Retrieved 9 results from Apify
  Sample item keys: ['url', 'error', 'errorDescription']
  DEBUG: URL https://www.facebook.com/ahmalormee/... | raw_phone=None
  DEBUG: URL https://www.facebook.com/CaTzPatisserie/... | raw_phone=None
  DEBUG: URL https://www.facebook.com/funtoast... | raw_phone=+65 6816 2849
  DEBUG: URL https://www.facebook.com/Bedok85/... | raw_phone=None
  DEBUG: URL https://www.facebook.com/jiaksongsg/... | raw_phone=None
  DEBUG: URL https://m.facebook.com/lianhuatfishsoup/... | raw_phone=None
  DEBUG: URL https://www.facebook.com/haruplate... | raw_phone=None
  DEBUG: URL https://m.facebook.com/100076205071703... | raw_phone=None
Done! Found 1/9 phone numbers.
Summary: 1 with phones, 8 without phones (includes 0 filtered URLs)


In [12]:
print("with fb")
print("----------------")
print(facebook_phone_df["Phones"].apply(str).is_unique)
print(facebook_phone_df["UEN"].is_unique)
print(facebook_phone_df.shape)

print("/n without fb")
print("----------------")
print(facebook_no_phone_df["Phones"].apply(str).is_unique)
print(facebook_no_phone_df["UEN"].is_unique)
print(facebook_no_phone_df.shape)

with fb
----------------
True
True
(1, 15)
/n without fb
----------------
False
True
(8, 15)


### Adding Source to facebook_phone_df and merging with df_With_Phones_1


In [14]:
facebook_phone_df.shape

(1, 15)

### Assign PIC 1 SOurce as Facebook

In [15]:
# Assign PIC Source 1 as "RecordOwl" if Phones has any value
facebook_phone_df.loc[facebook_phone_df["Phones"].notna(), "PIC Source 1"] = "Facebook"


### 

### Merge df_With_Phones_1 with facebook_phone_df = with unique value phone number

In [21]:
df_With_Phones_2 = pd.concat([df_With_Phones_1, facebook_phone_df], ignore_index=True)


print(df_With_Phones_2["UEN"].is_unique)
print(df_With_Phones_2.shape)

True
(38, 15)


### Merging back facebook_no_phone_df with df_without_facebook = no valid number

In [23]:
df_without_phones_2 = pd.concat([df_without_facebook, facebook_no_phone_df], ignore_index=True)

print(df_without_phones_2["UEN"].is_unique)
print(df_without_phones_2.shape)

True
(62, 15)


### Quality Check df_With_Phones_2

In [24]:
# Check for NA or empty values in Phones and PIC Source 1
print("Phones - NA count:", df_With_Phones_2["Phones"].isna().sum())
print("Phones - Empty string count:", (df_With_Phones_2["Phones"] == "").sum())
print("PIC Source 1 - NA count:", df_With_Phones_2["PIC Source 1"].isna().sum())
print("PIC Source 1 - Empty string count:", (df_With_Phones_2["PIC Source 1"] == "").sum())

Phones - NA count: 0
Phones - Empty string count: 0
PIC Source 1 - NA count: 0
PIC Source 1 - Empty string count: 0


### Quality Check df_without_phones_2

In [25]:
# Check where PIC Source 1 is not NA in df_without_phones_2
print("Rows where PIC Source 1 is not NA:")
print(df_without_phones_2[df_without_phones_2["PIC Source 1"].notna()].shape)
print()
df_without_phones_2[df_without_phones_2["PIC Source 1"].notna()]

Rows where PIC Source 1 is not NA:
(0, 15)



Unnamed: 0,UEN,Phones,Emails,Website,Facebook,LinkedIn,Instagram,TikTok,operational_street,operational_unit,operational_postal_code,operational_address,PIC Source 1,PIC Source 2,PIC Source 3


### Separate W/O Websites

In [None]:
# DataFrame with Websites (not null and not empty)
df_with_websites = df_without_phones_2[
    df_without_phones_2["Website"].notna() &
    (df_without_phones_2["Website"] != "")
]

# DataFrame without Websites (null or empty)
df_without_websites = df_without_phones_2[
    df_without_phones_2["Website"].isna() |
    (df_without_phones_2["Website"] == "")
]

# Check the shapes
print(f"With Websites: {df_with_websites.shape}")
print(f"Without Websites: {df_without_websites.shape}")
print(f"Total from df_without_phones_2: {df_without_phones_2.shape}")

With Websites: (10, 15)
Without Websites: (52, 15)
Total from df_without_phones_2: (62, 15)


### Extract Phone Numbers from Websites

In [27]:
from Apify_Scrapper_Functions import Get_Phone_Number_From_Facebook, Get_Phone_Number_From_Website

scrapped_from_websites = Get_Phone_Number_From_Website(df_with_websites)

üåê WEBSITE PHONE NUMBER SCRAPER - COST-OPTIMIZED
üìä Configuration:
   ‚Ä¢ Batch size: 500 websites (increased for efficiency)
   ‚Ä¢ Concurrency: 3 browsers (parallel processing)
   ‚Ä¢ Page timeout: 15s (reduced for speed)
   ‚Ä¢ Function timeout: 30s (optimized)
   ‚Ä¢ Retries: 1 (minimize compute waste)
   ‚Ä¢ Browser: Chromium (lightweight)
   ‚Ä¢ Proxy: DATACENTER (cost-optimized)
   ‚Ä¢ Strategy: Contact page ‚Üí Homepage fallback

üîë Validating Apify API token...
‚úÖ API Key valid - User: epos-pte-ltd
   ‚Ä¢ Plan: SCALE
   ‚Ä¢ Credits remaining: Check your dashboard at https://console.apify.com/billing

üìã Total rows in RecordOwl_Leads: 10
üìã Rows with valid websites: 10

üîç STEP 1: VERIFYING WEBSITE ACCESSIBILITY
Testing HTTP/HTTPS connectivity for all websites...
  ‚úÖ https://www.myfood.sg/
  ‚è≠Ô∏è  https://www.mycareersfuture.gov.sg/job/food-and-beverage/general-manager-sv-services-engineering-8ee823de39cbc5a13ba01da9eea8b197 - Skipped (contains 'mycareersfuture

In [28]:
scrapped_from_websites_copy = scrapped_from_websites.drop_duplicates(subset=["UEN"], keep="first")

# Mask for rows with valid phone values (not NA, not None, not empty string)
mask_has_phone = (
    scrapped_from_websites_copy["Phones"].notna() & 
    (scrapped_from_websites_copy["Phones"].apply(str) != "None") & 
    (scrapped_from_websites_copy["Phones"].apply(str) != "")
)

website_number = scrapped_from_websites_copy[mask_has_phone].copy()
no_website_number = scrapped_from_websites_copy[~mask_has_phone].copy()


In [29]:
# Assign PIC Source 1 as "RecordOwl" if Phones has any value
website_number.loc[website_number["Phones"].notna(), "PIC Source 1"] = "Website"


In [30]:
scrapped_from_websites_copy.shape

(10, 20)

In [31]:
website_number.shape


(0, 20)

In [32]:
no_website_number.shape

(10, 20)

### Merge df_With_Phones_2 with website_number

In [33]:
df_With_Phones_2.shape

(38, 15)

In [34]:
website_number.shape

(0, 20)

In [43]:
df_With_Phones_3 = pd.concat([df_With_Phones_2, website_number], ignore_index=True)

print(df_With_Phones_3["UEN"].is_unique)
print(df_With_Phones_3.shape)

True
(38, 20)


### Merge df_without_websites with no_website_number

In [36]:
df_without_websites.shape

(52, 15)

In [37]:
no_website_number.shape

(10, 20)

In [None]:
df_without_phones_3 = pd.concat([df_without_websites, no_website_number], ignore_index=True)

print(df_without_phones_3["UEN"].is_unique)
print(df_without_phones_3.shape)

True
(62, 20)


### Quality Check df_With_Phones_3

In [44]:
# Check for NA or empty values in Phones and PIC Source 1
print("Phones - NA count:", df_With_Phones_3["Phones"].isna().sum())
print("Phones - Empty string count:", (df_With_Phones_3["Phones"] == "").sum())
print("PIC Source 1 - NA count:", df_With_Phones_3["PIC Source 1"].isna().sum())
print("PIC Source 1 - Empty string count:", (df_With_Phones_3["PIC Source 1"] == "").sum())

Phones - NA count: 0
Phones - Empty string count: 0
PIC Source 1 - NA count: 0
PIC Source 1 - Empty string count: 0


In [45]:
df_With_Phones_3["UEN"].is_unique

True

### Quality Check df_without_phones_3

In [41]:
# Check for NA or empty values in Phones and PIC Source 1
print("Phones - NA count:", df_without_phones_3["Phones"].isna().sum())
print("Phones - Empty string count:", (df_without_phones_3["Phones"] == "").sum())
print("PIC Source 1 - NA count:", df_without_phones_3["PIC Source 1"].isna().sum())
print("PIC Source 1 - Empty string count:", (df_without_phones_3["PIC Source 1"] == "").sum())

Phones - NA count: 61
Phones - Empty string count: 0
PIC Source 1 - NA count: 62
PIC Source 1 - Empty string count: 0


In [49]:
df_without_phones_3["UEN"].is_unique

True

In [54]:
df_With_Phones_3

Unnamed: 0,UEN,Phones,Emails,Website,Facebook,LinkedIn,Instagram,TikTok,operational_street,operational_unit,operational_postal_code,operational_address,PIC Source 1,PIC Source 2,PIC Source 3,Website_Scrape_Status,Website_Scrape_Error,Website_Phones,Website_Contact_Page,Website_Page_Type
0,52895287X,[+6564836187],,https://hungrygowhere.com/singapore/chng_li_ch...,,,,,10 ANG MO KIO INDUSTRIAL PARK 2A AMK AUTOPOINT,01-21,568047,10 ANG MO KIO INDUSTRIAL PARK 2A AMK AUTOPOINT...,RecordOwl,,,,,,,
1,201505039Z,[+6562211336],[sup@wantonsg.com],https://wantonsg.com/,[https://www.facebook.com/wanton.sg/],,[https://www.instagram.com/wanton.sg/?hl=en],,458 RACE COURSE ROAD,,218699,458 RACE COURSE ROAD Singapore 218699,RecordOwl,,,,,,,
2,201212824G,[+6562550155],,,,,,,1 SOPHIA ROAD ONE SOPHIA,01-52,228149,1 SOPHIA ROAD ONE SOPHIA 01-52 Singapore 228149,RecordOwl,,,,,,,
3,201334572W,[+6596882525],[8082fnbpl@gmail.com],,[https://m.facebook.com/8082KOPITIAM/about/],,[https://www.instagram.com/explore/locations/5...,,21 BENOI SECTOR MAPLETREE BENOI LOGISTICS HUB,01-05,629853,21 BENOI SECTOR MAPLETREE BENOI LOGISTICS HUB ...,RecordOwl,,,,,,,
4,201621449E,[+6562659515],,https://kimlygroup.sg/,,,,,"13, WOODLANDS LINK",,738725,"13, WOODLANDS LINK Singapore 738725",RecordOwl,,,,,,,
5,201818844M,[+6585288528],[phovietnam.sg@gmail.com],https://phovietnam.sg/,[https://www.facebook.com/phovietnam.sg/],,,,161B PUNGGOL CENTRAL,18-97,822161,161B PUNGGOL CENTRAL 18-97 Singapore 822161,RecordOwl,,,,,,,
6,53383118X,[+6580705986],,,[https://www.facebook.com/sinkeefamouschickenr...,,,,14 MEDICAL DRIVE,01-03,117599,14 MEDICAL DRIVE 01-03 Singapore 117599,RecordOwl,,,,,,,
7,T17LL0782C,[+6587801998],,,,,[https://www.instagram.com/sg.jinhojiak/?hl=en],,71 NANYANG DRIVE NTU INNOVATION CENTRE,03-23,638075,71 NANYANG DRIVE NTU INNOVATION CENTRE 03-23 S...,RecordOwl,,,,,,,
8,202007378D,[+6568072250],,https://www.mycareersfuture.gov.sg/job/food-an...,,,,,51 UBI AVENUE 1 PAYA UBI INDUSTRIAL PARK,01-17,408933,51 UBI AVENUE 1 PAYA UBI INDUSTRIAL PARK 01-17...,RecordOwl,,,,,,,
9,202346258K,[+6596963939],,,[https://www.facebook.com/RITZSKITCHEN/],,,,713 JURONG WEST STREET 71 NANYANG SAPPHIRE,B1-37,640713,713 JURONG WEST STREET 71 NANYANG SAPPHIRE B1-...,RecordOwl,,,,,,,


In [53]:
df_without_phones_3

Unnamed: 0,UEN,Phones,Emails,Website,Facebook,LinkedIn,Instagram,TikTok,operational_street,operational_unit,operational_postal_code,operational_address,PIC Source 1,PIC Source 2,PIC Source 3,Website_Scrape_Status,Website_Scrape_Error,Website_Phones,Website_Contact_Page,Website_Page_Type
0,53104036D,[+6561000029],[yummeefoods@foodline.sg],,,,,,3 YUNG SHENG ROAD,03-125,618499,3 YUNG SHENG ROAD 03-125 Singapore 618499,,,,,,,,
1,53399916B,,,,,,,,200 JALAN SULTAN TEXTILE CENTRE,02-04,199018,200 JALAN SULTAN TEXTILE CENTRE 02-04 Singapor...,,,,,,,,
2,202429107W,,,,,,,,348 JALAN BOON LAY,01-07B,619529,348 JALAN BOON LAY 01-07B Singapore 619529,,,,,,,,
3,202135973C,,,,,,,,24A SENOKO SOUTH ROAD SELECT GROUP BUILDING,,758099,24A SENOKO SOUTH ROAD SELECT GROUP BUILDING Si...,,,,,,,,
4,202540224W,,,,,,,,53 GEYLANG BAHRU GEYLANG BAHRU RIVERPOINT,06-3599,330053,53 GEYLANG BAHRU GEYLANG BAHRU RIVERPOINT 06-3...,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57,202417398C,,,https://www.joohwafood.com.sg/,,,,,46 HOLLAND DRIVE,01-359,270046,46 HOLLAND DRIVE 01-359 Singapore 270046,,,,error,Error during scraping: ApifyApiError: Input is...,,,
58,53478000L,,,https://chickenpie.sg/,,,,,21 COMPASSVALE STREET COMPASSVALE PRIMARY SCHOOL,,545091,21 COMPASSVALE STREET COMPASSVALE PRIMARY SCHO...,,,,error,Error during scraping: ApifyApiError: Input is...,,,
59,T16LL2399D,,[ahmalormee@foodline.sg],https://misstamchiak.com/ah-ma-lor-mee,https://www.facebook.com/ahmalormee/,,[https://www.instagram.com/ahmalormee/?hl=en],,427 BUKIT PANJANG RING ROAD,08-699,670427,427 BUKIT PANJANG RING ROAD 08-699 Singapore 6...,,,,error,Error during scraping: ApifyApiError: Input is...,,,
60,53414661M,,,https://catzpatisserie.cococart.co/,https://www.facebook.com/CaTzPatisserie/,,[https://www.instagram.com/catzpatisserie/],,25 PHOENIX RISE HUA MEI GARDENS,,668227,25 PHOENIX RISE HUA MEI GARDENS Singapore 668227,,,,error,Error during scraping: ApifyApiError: Input is...,,,


### Saving

In [50]:
df_With_Phones_3.to_parquet("./Staging/Silver/Silver_data_2_Phone.parquet", index=False, engine="fastparquet")

In [51]:
df_without_phones_3.to_parquet("./Staging/Silver/Silver_data_2_No_Phone.parquet", index=False, engine="fastparquet")