In [7]:
import json
import sys
import unicodedata
import pandas as pd
import yaml
from datetime import datetime, timedelta
from google.oauth2 import service_account
from googleapiclient.discovery import build

def remove_accents(input_str):
    """Remove accents from a given string."""
    nfkd_form = unicodedata.normalize('NFKD', input_str)
    return "".join([c for c in nfkd_form if not unicodedata.combining(c)])

def convert_submitted_at_to_brt(date_str):
    """Convert 'Submitted At' date string from UTC to BRT and format it."""
    try:
        date_utc = datetime.strptime(date_str, '%m/%d/%Y %H:%M:%S')
        brt_offset = timedelta(hours=-3)
        date_brt = date_utc + brt_offset
        return date_brt.strftime('%Y-%m-%d %H:%M:%S BRT')
    except ValueError:
        return date_str

def process_row(row, date_column):
    """Process a single row, removing accents and converting the specified date column."""
    cleaned_row = {}
    for key, value in row.items():
        clean_key = remove_accents(key)
        clean_value = remove_accents(value)
        if clean_key == date_column:
            clean_value = convert_submitted_at_to_brt(clean_value)
        cleaned_row[clean_key] = clean_value
    return cleaned_row

def authenticate_google_sheets(credentials_file):
    """Authenticate and connect to Google Sheets API."""
    creds = service_account.Credentials.from_service_account_file(credentials_file, scopes=['https://www.googleapis.com/auth/spreadsheets.readonly'])
    service = build('sheets', 'v4', credentials=creds)
    return service

def read_google_sheet(service, sheet_id, range_name):
    """Read data from a Google Sheet."""
    sheet = service.spreadsheets()
    result = sheet.values().get(spreadsheetId=sheet_id, range=range_name).execute()
    values = result.get('values', [])
    
    # Check if the sheet contains any data
    if not values:
        raise ValueError("No data found in the Google Sheet.")
    
    # Ensure the data is consistent in terms of columns
    header = values[0]
    data = values[1:]
    for row in data:
        if len(row) != len(header):
            raise ValueError("Data row length does not match header length.")
    
    df = pd.DataFrame(data, columns=header)
    return df

def csv_to_json(config):
    """Convert data from Google Sheets to JSON format with accent removal and date conversion."""
    credentials_file = config['credentials_file']
    sheet_id = config['sheet_id']
    range_name = config['range_name']
    output_json_file = config['output_json_file']
    date_column = config['date_column']

    try:
        service = authenticate_google_sheets(credentials_file)
        df = read_google_sheet(service, sheet_id, range_name)
        
        data_list = [process_row(row, date_column) for index, row in df.iterrows()]
        json_data = json.dumps(data_list, indent=4, ensure_ascii=False)

        with open(output_json_file, 'w', encoding='utf-8') as json_file:
            json_file.write(json_data)

        print(f"Successfully converted data from Google Sheet {sheet_id} to {output_json_file}")

    except FileNotFoundError:
        print(f"Error: The file {credentials_file} was not found.", file=sys.stderr)
    except IOError:
        print(f"Error: An I/O error occurred while processing the file.", file=sys.stderr)
    except ValueError as ve:
        print(f"Error: {ve}", file=sys.stderr)
    except Exception as e:
        print(f"An unexpected error occurred: {e}", file=sys.stderr)

# Load the configuration from the YAML file
with open('google_sheets_config.yaml', 'r', encoding='utf-8') as config_file:
    config = yaml.safe_load(config_file)

# Call the function to perform the conversion
csv_to_json(config)

Successfully converted data from Google Sheet 1H-jdGeRTTj1vongidNJSsthWCTZXqYUylVo6xpMw0sI to Canada - vistoria - safra 23_24.json
