In [13]:
import requests
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup

import time
from time import sleep
from datetime import datetime
import re

import numpy as np
import pandas as pd

import smtplib
from email.mime.text import MIMEText
from email import encoders, utils
from email.mime.multipart import MIMEMultipart

In [14]:
keywords = ["邊坡", "崩塌", "復建"]
orgkws = ["水土保持局臺北分局", "羅東林區管理處 ", "第四區養護工程處", "宜蘭縣"]

In [15]:
def get_tender_by_kw(kws, loc):
    driver = webdriver.PhantomJS(executable_path=r"D:\PyProjects\projcrawler\phantomjs-2.1.1-windows\bin\phantomjs.exe")
    tender_list = []
    for k in kws:
        bidSystemUrl = "https://web.pcc.gov.tw/tps/pss/tender.do?method=goSearch&searchMode=common&searchType=basic"
        browse = driver.get(bidSystemUrl)
        homepage = driver.page_source
        query = driver.find_element_by_name("tenderName")
        query.clear()
        query.send_keys(k)
        queryCate = driver.find_element_by_css_selector("#radProctrgCate1")
        queryCate.click()
        queryTime = driver.find_element_by_css_selector("#rangeTenderDateRadio")
        queryTime.click()
        query_loc = driver.find_element_by_name("orgName")
        query_loc.send_keys(loc)
        query.send_keys(Keys.RETURN)
        sleep(1) #等頁面回傳
        bidpage = driver.page_source
        soup = BeautifulSoup(bidpage, 'html.parser')
        tender_table = soup.find_all("tr", onmouseover="overcss(this);")
        tender_list += tender_table
    return list(set(tender_list))

def get_tender_by_org(orgs):
    driver = webdriver.PhantomJS(executable_path=r"D:\PyProjects\projcrawler\phantomjs-2.1.1-windows\bin\phantomjs.exe")
    tender_list = []
    for o in orgs:
        bidSystemUrl = "https://web.pcc.gov.tw/tps/pss/tender.do?method=goSearch&searchMode=common&searchType=basic"
        browse = driver.get(bidSystemUrl)
        homepage = driver.page_source
        query = driver.find_element_by_name("orgName")
        query.clear()
        query.send_keys(o)
        queryCate = driver.find_element_by_css_selector("#radProctrgCate1")
        queryCate.click()
        queryTime = driver.find_element_by_css_selector("#rangeTenderDateRadio")
        queryTime.click()
        query.send_keys(Keys.RETURN)
        sleep(1) #等頁面回傳
        bidpage = driver.page_source
        soup = BeautifulSoup(bidpage, 'html.parser')
        tender_table = soup.find_all("tr", onmouseover="overcss(this);")
        tender_list += tender_table
    return list(set(tender_list))

def get_tender_info(tender):
    return {
        "機關名稱": "".join(tender.find_all("td")[1].text.split()), 
        "標案案號&標案名稱": " ".join(tender.find_all("td")[2].text.split()), 
        "傳輸次數": "".join(tender.find_all("td")[3].text.split()), 
        "招標方式": "".join(tender.find_all("td")[4].text.split()), 
        "採購性質": "".join(tender.find_all("td")[5].text.split()), 
        "公告日期": "".join(tender.find_all("td")[6].text.split()), 
        "截止投標": "".join(tender.find_all("td")[7].text.split()), 
        "預算金額": "".join(tender.find_all("td")[8].text.split()), 
        "url": tender.find_all("td")[2].find('a').get('href').replace("..", "https://web.pcc.gov.tw/tps")
           }

In [16]:
t_ks = []
for loc in ["臺北", "新北", "桃園", "新竹", "宜蘭"]:
    t = get_tender_by_kw(keywords, loc)
    t_ks += t
t_orgs = get_tender_by_org(orgkws)
t_list = list(set(t_ks+t_orgs))



In [17]:
len(t_ks), len(t_orgs), len(t_list)

(23, 38, 61)

In [18]:
get_tender_info(t_list[0])

{'機關名稱': '新北市坪林區公所',
 '標案案號&標案名稱': '111011201 111年度坪林區天然災害搶災搶險及復建工程開口契約',
 '傳輸次數': '03',
 '招標方式': '公開招標',
 '採購性質': '工程類',
 '公告日期': '111/02/10',
 '截止投標': '111/02/16',
 '預算金額': '7,564,367',
 'url': 'https://web.pcc.gov.tw/tps/tpam/main/tps/tpam/tpam_tender_detail.do?searchMode=common&scope=F&primaryKey=53720251'}

In [19]:
now = datetime.now().strftime("%Y/%m/%d")
text = f"<h>{now}等標期內標案</h>"

In [20]:
tender_df = pd.DataFrame()
for t in t_list:
    temp_df = pd.DataFrame()
    t_info = get_tender_info(t)
    for k, v in t_info.items():
        temp_df[k] = [v]
    tender_df = pd.concat([tender_df, temp_df], ignore_index=True)
tender_df.sort_values("截止投標", inplace=True, ignore_index=True)
tender_df.drop_duplicates(keep='last', ignore_index=True, inplace=True)

In [21]:
tender_df['url'] = '<a href=' + tender_df['url'] + '><div>連結</div></a>'
text += tender_df.to_html(escape=False).replace("\n", "")

In [22]:
mailList = ['chongjing3370@gmail.com', 'tusty9292@gmail.com', 'fish892555@gmail.com']

In [23]:
me = 'tusty9292@gmail.com'
mail = MIMEMultipart()
mail['Subject'] = "政府採購公告網 "+str(time.strftime("%Y/%m/%d-%H%M"))
mail['From'] = 'chongjing3370@gmail.com'
mail['Date'] = utils.formatdate(localtime = 1)
mail['Message-ID'] = utils.make_msgid()
mail['Content-Type'] = "text/calendar; charset=utf-8"
body = MIMEText(text, 'html', "utf-8")
body.set_charset("utf-8")
mail.attach(body)

In [24]:
with smtplib.SMTP(host="smtp.gmail.com", port="587") as smtp:  # 設定SMTP伺服器
    try:
        smtp.ehlo()  # 驗證SMTP伺服器
        smtp.starttls()  # 建立加密傳輸
        smtp.login('chongjing3370@gmail.com', 'njhxfuhvwdmdjgds')  # 登入寄件者gmail
        smtp.sendmail(me, mailList, mail.as_string())  # 寄送郵件
        print("Complete!")
    except Exception as e:
        print("Error message: ", e)

Complete!
