set up

In [4]:
import os
import sys
import json
import datetime
import psycopg2
import configparser
import traceback


# Load config file
config = configparser.ConfigParser()
config.read('../config/config.conf')
configs = config['source']
path = config['path']
temp_dir = f"..{path['temp']}"

sys.path.append(config['path']['root'])

from utils import (validation as validate,
                   parsing as parse,
                   logging)

from importlib import reload
reload(validate)
reload(logging)
reload(parse)

# Load configuration file (json format) for source database 
with open(configs['config_file']) as json_file:
    source = json.load(json_file)

Test connection

transform

In [5]:
def process_file(filename, cnx):
    header_skipped = False  # Initialize the flag outside the loop
    with open(filename, 'r') as f:
        for line in f:
            if not header_skipped:  # Skip the header row
                header_skipped = True
                continue
            process_line(line, cnx)


def process_line(line, cnxe):
    row = line.strip().split(',')
    err_value = None  # Initialize to track the value causing an error
    try:
        if not validate.layout(row):
            err_value = row
            print(f"Validation failed: Layout validation failed for row {err_value}")
            raise ValueError('Layout validation failed')

        if row[0].lower() != 'email' and not validate.email(row[0]):
            err_value = row[0]
            print(f"Validation failed: Invalid email format for '{err_value}'")
            raise ValueError('Invalid email format')

        # Date fields are at indices 4 (Fecha envio) and 5 (Fecha open), and index 8 (Fecha click)
        date_indices = [4, 5, 8]
        for i in date_indices:
            if row[i] != '-' and not validate.date(row[i]):
                err_value = row[i]
                print(f"Validation failed: Invalid date format for '{err_value}' at position {i}")
                raise ValueError('Invalid date format')

        insert_data(row, cnx)  # If all validations pass, insert the data
    except Exception as e:
        err_val = err_value or row
        print(f"Exception occurred: {e}."
            f"Traceback: {traceback.format_exc()}"
            f"Error value: {err_value}")
        logging.cnx_error(filename, cnx, err_value, e, traceback.format_exc())


def insert_data(row, cnx):
    visitor = parse.visitor(row)
    statistics = parse.statistics(row)
    with cnx.cursor() as cursor:
        insert_visitor(visitor, cursor)
        insert_statistics(statistics, cursor)
        cnx.commit()


def insert_visitor(visitor, cursor):
    visitor_insert = """
    INSERT INTO visitor (email, fechaPrimeraVisita, fechaUltimaVisita, visitasTotales, visitasAnioActual, visitasMesActual)
    VALUES (%s, %s, %s, %s, %s, %s)
    """
    cursor.execute(visitor_insert,(
        visitor['email'],
        visitor['fechaPrimeraVisita'],
        visitor['fechaUltimaVisita'],
        visitor['visitasTotales'],
        visitor['visitasAnioActual'],
        visitor['visitasMesActual']
        )
    )


def insert_statistics(statistics, cursor):
    statistics_insert = """
    INSERT INTO statistics (email, jyv, Badmail, Baja, Fecha_envio, Fecha_open, Opens, Opens_virales, Fecha_click, Clicks, Clicks_virales, Links, IPs, Navegadores, Plataformas)
    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
    """
    cursor.execute(statistics_insert, (
        statistics['email'],
        statistics['dynamic'],
        statistics['Badmail'],
        statistics['Baja'],
        statistics['Fecha envio'],
        statistics['Fecha open'],
        statistics['Opens'],
        statistics['Opens virales'],
        statistics['Fecha click'],
        statistics['Clicks'],
        statistics['Clicks virales'],
        statistics['Links'],
        statistics['IPs'],
        statistics['Navegadores'],
        statistics['Plataformas']
    )
)

In [3]:
# Connect to Postgres database
cnx = psycopg2.connect(
    database=source['database'],
    user=source['user'],
    password=source['password'],
    host=source.get('host', 'localhost'),  # 'localhost' as default
    port=source.get('port', '5432')  # '5432' is postgres' default
)


# Usage
temp_dir = f"..{path['temp']}"
for filename in os.listdir(temp_dir):
    if filename.endswith('.txt'):
        full_path = os.path.join(temp_dir, filename)
        process_file(full_path, cnx)
        os.remove(full_path)


# Close Postgres connection
cnx.close()

Exception occurred: 'dynamic_col'.Traceback: Traceback (most recent call last):
  File "/tmp/ipykernel_66689/3781525638.py", line 33, in process_line
    insert_data(row, cnx)  # If all validations pass, insert the data
    ^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_66689/3781525638.py", line 47, in insert_data
    insert_statistics(statistics, cursor)
  File "/tmp/ipykernel_66689/3781525638.py", line 74, in insert_statistics
    statistics['dynamic_col'],
    ~~~~~~~~~~^^^^^^^^^^^^^^^
KeyError: 'dynamic_col'
Error value: None


TypeError: 'NoneType' object is not iterable