### Mini práctica
1. Toma tu script de clients.csv que hiciste en el subtema anterior.
2. Automatízalo para que se ejecute cada 1 minuto y guarde un archivo nuevo `clean_clients.csv`
3. Agrega un `print('ETL running at: 'current_time)` para verificar cuándo corre.


In [None]:
# Libraries
import pandas as pd
import numpy as np
import schedule
import time
from datetime import datetime
from email_validator import validate_email, EmailNotValidError
import re

In [None]:
# check email function (one way)
def is_valid_mail(email):
    try:
        validate_email(email)
        return True
    except EmailNotValidError:
        return False

In [None]:
# check email function
def is_valid_email_regex(email):
    if pd.isna(email) or not isinstance(email, str):
        return False
    pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
    return bool(re.match(pattern, email))

In [None]:
# ETL
def etl_job():
    current_time = datetime.now().strftime('%H:%M:%S')
    print(f'ETL Running at: {current_time}')
    # Extract
    df = pd.read_csv('clients.csv')
    # Transform
    df['name'] = df['name'].str.title()
    valid_age = df[(df['age']>0) & (df['age'].notna())]['age']
    df['age'] = np.where((df['age']<=0)|(df['age'].isna()),valid_age.mean().__round__(),df['age'])
    df['age'] = df['age'].astype(int)    
    df['email'] = np.where(df['email'].apply(is_valid_email_regex),df['email'],'invalid_email')
    
    # Load
    df.to_csv('clean_clients.csv')

In [None]:
# Programming the script
schedule.every(1).minutes.do(etl_job)
while True:
    schedule.run_pending()
    time.sleep(1)
    