In [49]:
import config
import gspread
import json
import openai
import os

import numpy as np
import pandas as pd

# Authorize gspread
gc, authorized_user = gspread.oauth_from_dict(config.gspread_secret_key_personal, config.gspread_auth_key_personal)

# Read Input from directory

directory = r"C:\Users\Hooman Deghani\Python\Data Analysis\Outreach - Skyscraper\Output\Current"

## List all CSV files in the directory

urls = [os.path.join(directory, filename) for filename in os.listdir(directory) if filename.endswith('.xlsx')]

if len(urls) > 1:
    df = pd.concat([pd.read_excel(url) for url in urls], ignore_index=True)

else: 
    df = pd.read_excel(urls[0])

# Create a variable for the title of the project
for filename in os.listdir(directory):
    if filename.endswith('.xlsx'):
        # Capture the cryptic name
        artifact_name = filename
        
        # Extract the essence
        title = artifact_name.split('.')[0]

# Load records.json into a python dictionary titled records
with open(r"C:\Users\Hooman Deghani\Python\Data Analysis\Outreach - Skyscraper\Records.json", "r") as records_json:
    records = json.load(records_json)

# Check if an entry in records exists with title
if title not in records:
    records[title] = {
        'New': 'True',
        'Last': ''
    }

records

{'fthb': {'New': 'False',
  'Last': 'https://futurestartup.com/2020/03/21/coronavirus-the-complete-working-from-home-guide/'},
 'wfh': {'New': 'False',
  'Last': 'https://futurestartup.com/2020/03/21/coronavirus-the-complete-working-from-home-guide/'}}

In [41]:
# Clean up the data #

# Make df json compliant
df = df.applymap(str)
df.replace("nan", "", inplace=True)
df = df.applymap(str.strip)

# Remove rows where email is empty
Recipient_null = df.loc[:, 'Recipient'] == ""
df = df.loc[~Recipient_null]

# Reset the numerical index
df = df.reset_index()

In [42]:
# TODO:Fill referring topic with gpt3.5


In [44]:
# Prepare the batch #

# Create the dic variable
dic = records.get(title)

# if dataframe is new, batch = df.loc[0:30,:]
if dic.get("New") == "True":
    batch = df.loc[0:30,:]

# if dataframe is old, batch = the next 30 after "Last"
if dic.get("New") == "False":

    # Get the index label of the last email sent
    filt = df.loc[:, 'Referring page URL'] == records.get(title).get("Last")
    last = df.loc[filt].index[0]

    # batch = 30 after last
    batch = df.loc[(last + 1): (last + 31)]

batch

Unnamed: 0,level_0,index,Referring page URL,Domain rating,Target URL,Anchor,Root URL,First Name,Last Name,Recipient,Status,Replied,Converted,Email Sent,Referring Topic
0,0,0,https://resources.owllabs.com/blog/anywhere-of...,80.0,https://blog.hubspot.com/marketing/productivit...,Sam Mallikarjunan,https://owllabs.com,Bob,Simonton,bob.simonton@owllabs.com,,,,,
1,1,2,https://www.articulatemarketing.com/blog/best-...,69.0,https://blog.hubspot.com/marketing/productivit...,How to work from home: 20 tips from people who...,https://articulatemarketing.com,Clare,Dodd,clare.dodd@articulatemarketing.com,,,,,
2,2,4,https://www.catskills.com/covid-19/,36.0,https://blog.hubspot.com/marketing/productivit...,20 Tips for working at home,https://catskills.com,Sir/Madam,,office@catskills.com,,,,,
3,3,5,https://businessofanimation.com/creating-a-con...,14.0,https://blog.hubspot.com/marketing/productivit...,productivity,https://businessofanimation.com,Sir/Madam,,boa@businessofanimation.com,,,,,
4,4,6,https://www.nyit.edu/human_resources/remote_wo...,74.0,https://blog.hubspot.com/marketing/productivit...,How to Work From Home: 20 Tips From People Who...,https://nyit.edu,Sebastien,Marion,smarion@nyit.edu,,,,,
5,5,7,https://mopinion.com/resources-online-tools-fo...,71.0,https://blog.hubspot.com/marketing/productivit...,here,https://mopinion.com,Sir/Madam,,saskia@mopinion.com,,,,,
6,6,8,https://facilethings.com/blog/en/easy-time-man...,61.0,https://blog.hubspot.com/marketing/productivit...,routine like you’re going to the office,https://facilethings.com,Sir/Madam,,email@facilethings.com,,,,,
7,7,9,https://softwarekeep.com/blog/remote-work-tips...,69.0,https://blog.hubspot.com/marketing/productivit...,work-from-home,https://softwarekeep.com,Sir/Madam,,sue@softwarekeep.com,,,,,
8,8,10,https://salespanel.io/blog/marketing/hubspot-b...,63.0,https://blog.hubspot.com/marketing/productivit...,blog post,https://salespanel.io,Sir/Madam,,support@salespanel.io,,,,,
9,9,11,https://uwaterloo.ca/co-operative-education/su...,87.0,https://blog.hubspot.com/marketing/productivit...,How to Work From Home: 20 Tips From People Who...,https://uwaterloo.ca,Meghan,Ashdown,mashdown@uwaterloo.ca,,,,,


In [45]:
spreadsheet_title = "Outreach"
worksheet_title = f"Last:{batch.loc[batch.tail(1).index[0], 'First Name']}{batch.loc[batch.tail(1).index[0], 'Last Name']}"

spreadsheet = gc.open(spreadsheet_title)
worksheet = spreadsheet.add_worksheet(title=worksheet_title, rows=batch.shape[0], cols=batch.shape[1])
worksheet.update([batch.columns.tolist()] + batch.values.tolist())



{'spreadsheetId': '1DWBVxn5l2ww1kWQDMqDicRd_cDSXFWpdEVT_z2c9bk4',
 'updatedRange': "'Last:RuhulKader'!A1:O32",
 'updatedRows': 32,
 'updatedColumns': 15,
 'updatedCells': 480}

In [48]:
# Update records.json #

# Update records with the new variables
records[title]["New"] = "False"
records[title]["Last"] = batch.loc[batch.tail(1).index[0], 'Referring page URL']

# Push records to records.json
with open(r"C:\Users\Hooman Deghani\Python\Data Analysis\Outreach - Skyscraper\Records.json", "w") as records_json:
    json.dump(records, records_json)