In [None]:
import csv
import re
import timeit
import datetime
import psycopg2
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart

In [None]:
smtp_server = "smtp.gmail.com"
sender_email = "rise.python.automail@gmail.com"  # Enter your address
receiver_email = "chongdi0505@gmail.com"  # Enter receiver address
password = "rise654321"

In [None]:
output_header = ["Ticket",
                 "Date",
                 "Time",
                 "Header",
                 "County",
                 "Rating",
                 "Longitude",
                 "Latitude",
                 "St_number",
                 "St_name",
                 "Neighborhood",
                 "City",
                 "State",
                 "Zip",
                 "Excav_company",
                 "Inter1",
                 "Inter2"]

In [None]:
def email_notification(subject, message):
    
    msg = MIMEMultipart()
    msg['From'] = sender_email
    msg['To'] = receiver_email
    msg['Subject'] = subject

     # Attach the message to the MIMEMultipart object
    msg.attach(MIMEText(message, 'plain'))

    server = smtplib.SMTP('smtp.gmail.com', 587)
    server.starttls()
    server.login(sender_email, password)
    text = msg.as_string() # You now need to convert the MIMEMultipart object to a string to send
    server.sendmail(sender_email, receiver_email, text)
    server.quit()

In [None]:
input_path = "H:/geocoding/reduced_input/"
output_path = "H:/geocoding/reduced_input/geocode_out/"
input_file = "emergency_2015-02-09-21_13_46.txt"

#def geocoder(input_path, output_path, input_file):
try:
    conn = psycopg2.connect("dbname='geocoder' user='postgres' host='localhost' password='postgres'")
    print("Database connected!")

    cur = conn.cursor()

    f_in = open(input_path + input_file, 'r')
    f_out = open(output_path + input_file.replace('.txt', '.csv'), 'w')
    f_bad = open(output_path + input_file.replace('emergency', 'badlines'), 'w')
    f_log = open(output_path + input_file.replace('.txt', '.log'), 'w')

    write_header = csv.writer(f_out, lineterminator='\n')
    write_header.writerow(output_header)
    
    i = 0
    n = 0
    
    all_lines = f_in.readlines()
    
    # in hour
    estimated_processing_time = len(all_lines) * 4 / 3600
    
    start_message = "Geocoding Started"
    
    start_message = """\
    Job started at %d:%d:%d %d-%d-%d
    
    Input file: %s
    Input location: %s
    Output location: %s
    
    Total input entries: %d
    Estimated processing time: %d hours (4s/entry)
    """ % (
        datetime.datetime.now().time().hour,
        datetime.datetime.now().time().minute,
        datetime.datetime.now().time().second,
        datetime.datetime.now().date().month,
        datetime.datetime.now().date().day,
        datetime.datetime.now().date().year,
        input_file,
        input_path,
        output_path,
        len(all_lines),
        estimated_processing_time
            )
    
    email_notification(start_subject, start_message)
    
    for line in all_lines:
        if not re.search(r'Ticket', line):
            rec = line.split('~')
            address = (rec[6], # street number
                       rec[7].replace("'", "''"), # street name
                       rec[4]) # city name

            expression = """SELECT g.rating, ST_X(g.geomout), ST_Y(g.geomout), (addy).address, (addy).streetname, (addy).location, (addy).stateabbrev, (addy).zip FROM geocode('%s %s, %s, NJ', 1) AS g;""" % address

            cur.execute(expression)

            try:

                rows = [str(x) for x in cur.fetchall()[0]]

                f_out.write("%s,%s,%s,%s,%s," % (
                    rec[0], # ticket
                    rec[1].split(' ')[0], # date
                    rec[1].split(' ')[1], # time
                    rec[2], # header
                    rec[3]  # county
                ))

                f_out.write("%s,%s,%s,%s,%s,%s,%s,%s," % (
                    rows[0], # rating int
                    rows[1], # longitude float.13
                    rows[2], # latitude float.13
                    rows[3], # street number int
                    rows[4], # street name string
                    rows[5], # neighborhood string
                    rows[6], # state
                    rows[7]  # zip
                ))

                f_out.write("%s,%s,%s\n" % (
                    rec[5], # excav_comp
                    rec[8], # inter1
                    rec[9].strip(), # inter2
                ))

                f_log.write("%d-%d-%d %d:%d:%d: Entry %s complete!\n" % (
                    datetime.datetime.now().date().month,
                    datetime.datetime.now().date().day,
                    datetime.datetime.now().date().year,
                    datetime.datetime.now().time().hour,
                    datetime.datetime.now().time().minute,
                    datetime.datetime.now().time().second,
                    rec[0]))

            except:
                f_bad.write("%s" % line)
               
                n += 1

                f_log.write("%d-%d-%d %d:%d:%d: Entry %s badinput line!\n" % (
                    datetime.datetime.now().date().month,
                    datetime.datetime.now().date().day,
                    datetime.datetime.now().date().year,
                    datetime.datetime.now().time().hour,
                    datetime.datetime.now().time().minute,
                    datetime.datetime.now().time().second,
                    rec[0]))

            i += 1

    uncoded_rate = i + 1, n / (i + 1)

    f_log.write("All jobs are complete!\nTotal input = %d\nUngeocoded rate = %.4f%" % (uncoded_rate))

    f_in.close()
    f_out.close()
    f_bad.close()
    f_log.close()
    conn.close()
    print("All jobs are complete! Database disconnected!")

except:
    
    if i + 1 < len(all_lines):
        terminate_subject = "Geocoding Terminated"
        terminate_message = """\
        Job terminated at %d:%d:%d %d-%d-%d
        
        Current entry ID: %s
        Total processed entries: %d
        """ % (
            datetime.datetime.now().time().hour,
            datetime.datetime.now().time().minute,
            datetime.datetime.now().time().second,
            datetime.datetime.now().date().month,
            datetime.datetime.now().date().day,
            datetime.datetime.now().date().year,
            rec[0],
            i + 1,
        )
    
    else:
        terminate_subject = "Geocoding Complete"
        terminate_message = """\
        Job complete at %d:%d:%d %d-%d-%d
        
        Total processed entries: %d
        Ungeocoded entries: %d
        """ % (
            datetime.datetime.now().time().hour,
            datetime.datetime.now().time().minute,
            datetime.datetime.now().time().second,
            datetime.datetime.now().date().month,
            datetime.datetime.now().date().day,
            datetime.datetime.now().date().year,
            i + 1,
            n,
        )
    email_notification(terminate_subject, terminate_message)
    
    print("Database disconnected!")