# Automating Data Collection for Units needing Freight Quotes

In [61]:
# imports

# gsheets connection
import gspread_dataframe as gd
import gspread as gs
from google.colab import auth
from google.auth import default

# data handling
from datetime import date
import pandas as pd

# email handling
import os
from email.mime.text import MIMEText
from email.mime.application import MIMEApplication
from email.mime.multipart import MIMEMultipart
import smtplib
from io import StringIO, BytesIO
from email import encoders


# Connect to Gsheet

In [44]:
auth.authenticate_user() # prompts google sign in
creds, _ = default() # get default credentials from auth

gc = gs.authorize(creds) # pass credentials to gspread

# Collect data using gspread_dataframe

In [45]:
# Sheet URLS
VI = 'https://docs.google.com/spreadsheets/d/1bKwL32NDnkvZQdiba4pqfvvGDkqh9CBO3akwBV-uJEc/edit#gid=27' # Production Data, Input Sheet
LM = 'https://docs.google.com/spreadsheets/d/1vP8FzyoRfl1bnO4kSQa6t2xg5RdN4w8u0RAbd6rPzEM/edit#gid=0' # Shipping Data, Output Sheet for Cali Shipping Operations

In [46]:
def pandas_to_sheets(url,ws_name,df=None,mode='r'):

    # helper function for handling gsheets using gspread and gspread_dataframe
    # has 3 modes, w - write: replace existing sheet data, a - append: keep existing data and add new
    # r - read: read data from gsheets
   


    ws = gc.open_by_url(url).worksheet(ws_name) # open worksheet by GSheet URL and worksheet name
    # clear and write new data to worsheet
    if(mode=='w'):
        ws.clear()
        gd.set_with_dataframe(worksheet=ws,dataframe=df,include_index=False,include_column_header=True,resize=True)
        return True
    # append new data to existing data in worksheet
    elif(mode=='a'):
        ws.add_rows(df.shape[0])
        gd.set_with_dataframe(worksheet=ws,dataframe=df,include_index=False,include_column_header=False,row=len(ws.get_all_values())+1,resize=False)
        return True
    # get data from worksheet as df, here including only necessary columns for shipping robot.
    else:
        return gd.get_as_dataframe(worksheet=ws,usecols=[2,3,4,5,9,15,16,20,22], header=1)

In [47]:
q_df = pandas_to_sheets(VI,"PR") # get production data from gsheets using pandas_to_sheets
q_df.fillna("",inplace=True) # Remove Nonetypes

# Filter Production Data for Units to Quote

In [48]:
# select rows where Delivery Cost is empty, Shipping arranger is Valew or 1954, and Vin number is present

q_df = q_df[(q_df['Shipping Arranger'].str.match('VALEW|1954 MFG') ) & 
            (q_df['Delivery Cost'].str.match("|TBD")) & 
            (q_df['VIN #'].str.match(r".+\d{4}")) &
            (~q_df['Customer'].str.contains('Slot'))].copy()

# Group by delivery address and body type

In [49]:
to_quote = q_df.groupby(['Customer','Body Type', 'Shipping Address']).count()['VIN #']
to_quote = pd.DataFrame(to_quote.reset_index()).rename(columns={'VIN #':'Count'})

Unnamed: 0,Customer,Body Type,Shipping Address,Count
0,"ARI / UNITED - JACKSON, TN",10' DUMP,TBD,1
1,"ARI / UNITED - MOBILE, AL",10' DUMP,"4477 LAUGHLIN DRIVE S MOBILE, AL 36693",1
2,"ARI / UNITED - ORLANDO, FL",10' DUMP,"11909 S ORANGE BLOSSOM TR ORLANDO, FL 32837",1
3,ARI / UNITED - TBD,2000 TANK,TBD,2
4,"BOTTOM LINE EQUIPMENT - BAYTOWN, TX",2000 TANK,"11331 I-10 EAST #3, BAYTWON, TX 77523",4
5,"CASHMAN EQUIPMENT - HENDERSON, NV",4000 TANK,"3300 SAINT ROSE PARKWAY, HENDERSON NV 89052",2
6,"DANNA FARMS - PLUMAS LAKE, CA ®",1000 GAL FUEL & LUBE *CUSTOM PAINT*,"1001 FEATHER RIVER BLVD, PLUMAS LAKE, CA 95961",1
7,"HOLT OF CA - ELK GROVE, CA",2000 TANK,"9062 UNION PARK WAY, ELK GROVE CA 95624",4
8,"KIRBY SMITH - DALLAS, TX",2000 TANK,"8505 S CENTRAL EXPWY, DALLAS TX",2
9,"PAY DIRT CONSTRUCTION - VISALIA, CA",WATER TOWER,"31198 ROAD 132, VISALIA, CA 93292",1


# Email the to_quote df

In [71]:
def email_df(sender, recipients, subject, df, password):
  
  # parse recipient if list
  if type(recipients) == list:
    emaillist = [elem.strip().split(',') for elem in recipients]
  else:
    emaillist = recipients

  # create message, add subject and sender
  msg = MIMEMultipart()
  msg['Subject'] = subject
  msg['From'] = sender

  # format df into as html table
  html = """\
  <html>
    <head></head>
    <body>
      {0}
    </body>
  </html>
  """.format(df.to_html())
  # add table to message body
  msg_txt = MIMEText(html, 'html')
  msg.attach(msg_txt)

  # write df to excel file
  df.to_excel("attachment.xlsx")
  # open excel file and attach to email
  with open("attachment.xlsx", "rb") as attachment:
        msg_attach = MIMEBase("application", "octet-stream")
        msg_attach.set_payload((attachment).read())
  encoders.encode_base64(msg_attach)
  msg_attach.add_header(
  "Content-Disposition",
  f"attachment; filename= attachment.xlsx")
  msg.attach(msg_attach)
  # remove created file
  os.remove("attachment.xlsx")

  # connect to email server and send email
  server = smtplib.SMTP_SSL('smtp.gmail.com', 465)
  server.login(sender, password)
  server.sendmail(msg['From'], emaillist , msg.as_string())
  server.quit()

  return print("DF EMAILED SUCCESSFULLY")

In [73]:
from email.mime.base import MIMEBase
today = date.today().strftime("%m/%d/%Y")
subject = f"To Quote {today}"
password = "tqahcfjmgckkgcpc"
sender = '1954jacobf@gmail.com'
recipients = ['jacob@1954mfg.com', 'jacob.forbus@gmail.com', 'christalfaith@outlook.com']

email_df(sender, recipients, subject, to_quote, password)

DF EMAILED SUCCESSFULLY
