In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from datetime import datetime, timedelta
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

### LOGIN

In [None]:
driver = webdriver.Chrome()
driver.get("https://linkfromyourwebsite.com") #input website link

# Find login and password fields
username_field = driver.find_element(By.NAME, "login") 
password_field = driver.find_element(By.NAME, "pass")  

# Insert credentials
username_field.send_keys("username")  #input username
password_field.send_keys("password")  #input password

# Press Login button
password_field.send_keys(Keys.RETURN)

time.sleep(5)

### Date Filter

In [None]:
# Setting date to d-1 because in the website you have to input the date also
start_datetime = (datetime.now() - timedelta(days=1)).strftime("%d/%m/%Y 00:00:00")
end_datetime = (datetime.now() - timedelta(days=1)).strftime("%d/%m/%Y 23:59:59")    

# Find the date filter fields
start_date_field = driver.find_element(By.ID, "start_date")  
end_date_field = driver.find_element(By.ID, "end_date")      

# Fill in the fields with yesterday's date as per configuration
start_date_field.clear()
start_date_field.send_keys(start_datetime)  
end_date_field.clear()
end_date_field.send_keys(end_datetime)      

# Click the button to apply the filter
search_button = driver.find_element(By.CLASS_NAME, "btn-primary") 
search_button.click()

### Download Table

In [None]:
download_button = WebDriverWait(driver, 5).until(
    EC.element_to_be_clickable((By.ID, "button_csv"))
)

download_button.click()
time.sleep(4)
driver.quit()

### Incremental update

In [None]:
import pandas as pd
import glob
import os

In [None]:
# Set the directory where the CSV file is downloaded
diretorio_downloads = r"C:\Users\Bruno\Downloads"

# Search for the file that starts with "calls_report" in the directory
arquivos_csv = glob.glob(os.path.join(diretorio_downloads, "calls_report_*.csv"))

if arquivos_csv:
    # Select the most recent file
    arquivo_csv = max(arquivos_csv, key=os.path.getctime)
    print(f"File detected: {arquivo_csv}")
else:
    print("No files found with the specified pattern")
    exit()  

In [None]:
file_path_xlsx = r"C:\Users\Bruno\Documents\ligacoes\ligacoes_2024_25.xlsx"

# Load existing XLSX calls table
calls_table = pd.read_excel(file_path_xlsx)

# Load detected csv_file
new_data = pd.read_csv(arquivo_csv)

# Do an incremental merge based on the ID column
df = pd.concat([calls_table, new_data]).drop_duplicates(subset=['ID'], keep='last')
df = df.iloc[:-1]

# Save the updated excel in the folder
df.to_excel(file_path_xlsx, index=False)
print(f"Updated Excel file saved in {file_path_xlsx}")


### Transformations

In [None]:
# converting columns types
df['ID'] = df['ID'].astype(str)
df["HORA"] = pd.to_datetime(df["HORA"], format='%d/%m/%Y %H:%M:%S')
df['Nº A'] = df['Nº A'].astype(str)
df['Nº B'] = df['Nº B'].astype(str)
df['RAMAL'] = df['RAMAL'].astype(str)
df['SENTIDO'] = df['SENTIDO'].astype(str)
df['DURAÇÃO'] = pd.to_timedelta(df['DURAÇÃO']).dt.total_seconds().astype(int)

df.dtypes

ID                  object
HORA        datetime64[ns]
EVENTO              object
Nº A                object
Nº B                object
RAMAL               object
SENTIDO             object
DURAÇÃO              int64
FILA                object
GRAVAÇÃO            object
dtype: object

In [None]:
df_new = df.drop(columns=['FILA','GRAVAÇÃO'])

df_new.columns = df_new.columns.str.lower()

df_new = df_new.rename(columns={'nº a':'origem_lig','nº b':'dest_lig','hora':'data_lig','duração':'tempo_chamada'})

df_new['dest_lig'] = df_new['dest_lig'].str.replace(r'^\+?55', '', regex=True)
df_new['ramal'] = df_new['ramal'].apply(lambda x: x[:-2])

# Remover '.0' das colunas 'origem_lig' e 'dest_lig' (quando existir)
df_new['origem_lig'] = df_new['origem_lig'].astype(str).str.replace(r'\.0$', '', regex=True)
df_new['dest_lig'] = df_new['dest_lig'].astype(str).str.replace(r'\.0$', '', regex=True)

In [None]:
df_new.to_csv("tb_ligacoes.csv", index=False)