In [3]:
import os
import requests
from datetime import date, timedelta, datetime
import pandas as pd
import numpy as np
import psycopg2
import logging
import glob
import json
import sys
from pgdb import PGDataBase

In [14]:
CONFIG_FILE = '/Users/tatiana/Документы/Анализ данных/FINAL PROJECT/config.json'
class ConfigLoader:
    def __init__(self, config_path=CONFIG_FILE):
        self.config_path = config_path
        self.config = self.load_config()
        
    def load_config(self):
        try:
            with open(self.config_path, 'r') as f:
                config = json.load(f)
            return config
        except FileNotFoundError:
            logging.error(f"Configuration file not found: {CONFIG_FILE}")
            return None
        except Exception as e:
            logging.error(f"An unexpected error occurred loading config: {e}")
            return None

In [15]:
class LoggerManager:
    """
    Manages logging setup and cleanup.
    Sets up logging to a file based on the current date and cleans up logs older than 7 days.
    """
    def __init__(self, log_dir='logs', keep_days=7):
        self.log_dir = log_dir
        self.keep_days = keep_days
        self._setup_logging()

    def _setup_logging(self):
        if not os.path.exists(self.log_dir):
            os.makedirs(self.log_dir)
        
        current_date_str = datetime.now().strftime('%Y-%m-%d')
        log_file_path = os.path.join(self.log_dir, f'{current_date_str}.log')
        
        self._cleanup_old_logs()

        for handler in logging.root.handlers[:]:
            logging.root.removeHandler(handler)
        
        logging.basicConfig(
            format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', 
            level=logging.INFO, 
            handlers=[logging.FileHandler(log_file_path, 'a', 'utf-8')]
            )

    def _cleanup_old_logs(self):
        now = datetime.now()
        for file in glob.glob(os.path.join(self.log_dir, '*.log')):
            file_date_str = os.path.basename(file).split('.')[0]
            try:
                file_date = datetime.strptime(file_date_str, '%Y-%m-%d')
                if now - file_date > timedelta(days=self.keep_days):
                    os.remove(file)
            except ValueError:
                logging.warning(f"Skipping log file with unexpected name format: {file}")
            except Exception as e:
                logging.error(f"Error deleting log file {file}: {e}")

In [16]:
class APIClient:
    def __init__(self, timeout=60):
        self.timeout = timeout
    
    def fetch_raw(self, full_url, params=None):
        logging.info(f"API request: {full_url} | params={params}")
        try:
            r = requests.get(full_url, params=params, timeout=self.timeout)
            logging.info(f"API response: status={r.status_code}, content-type={r.headers.get('Content-Type')}")
            r.raise_for_status()
            data = r.json()
            if data is None:
                return []
            if isinstance(data, list):
                return data
            if isinstance(data, dict) and isinstance(data.get("data"), list):
                return data["data"]
            return []
        except requests.RequestException as e:
            logging.error(f"fetch_raw: {e}")
            return []
        except Exception as e:
            logging.error(f"fetch_raw unexpected: {e}")
            return []
        
    
    def fetch_df(self, full_url, params=None):
        records = self.fetch_raw(full_url, params)
        df = pd.DataFrame(records)
        if df.empty:
            return df
        date_col = "purchase_datetime"
        sec_col  = "purchase_time_as_seconds_from_midnight"
        if date_col in df.columns and sec_col in df.columns:
            df[date_col] = pd.to_datetime(df[date_col], errors="coerce")
            df["_purchase_time"] = pd.to_timedelta(df[sec_col], unit="s")
            df["purchase_datetime_full"] = df[date_col] + df["_purchase_time"]
            df = df.drop(columns=[date_col, sec_col, "_purchase_time"], errors="ignore")

        return df


ОСНОВНОЙ КОД

In [None]:

prev_day = (date.today()-timedelta(days=1)).strftime("%Y-%m-%d")

#Логи
logger_manager = LoggerManager(log_dir='logs', keep_days=7)
logging.info("Log file cleanup finished.")

#Чтение данных из конфига
config_loader = ConfigLoader(CONFIG_FILE)
config = config_loader.config
if config is None or \
   not config.get('database') or \
   not config.get('api_endpoint'): 
    logging.error("Failed to load essential configuration sections. Exiting.")
    sys.exit(1)
api_url = config['api_endpoint'].get('url')

#Скачивание данных в df по АПИ
client = APIClient(timeout=60)
params = {'date': prev_day}
df = client.fetch_df(api_url, params=params)
logging.info(f"DataFrame shape: {df.shape if isinstance(df, pd.DataFrame) else 'N/A'}")

#Загрузка в БД
cols = ["client_id", "gender", "product_id", "quantity", "price_per_item", "discount_per_item", "total_price", "purchase_datetime_full"]

df = df.copy()
df["purchase_datetime_full"] = pd.to_datetime(df["purchase_datetime_full"], errors="coerce")
df = df.dropna(subset=["purchase_datetime_full"])

params_seq = [
    (
        str(r.client_id),
        str(r.gender),
        str(r.product_id),
        int(r.quantity),
        float(r.price_per_item),
        float(r.discount_per_item),
        float(r.total_price),
        r.purchase_datetime_full.to_pydatetime(), 
    )
    for r in df[cols].itertuples(index=False)
]

sql = """
INSERT INTO public.sales
(client_id, gender, product_id, quantity, price_per_item, discount_per_item, total_price, date_time)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
"""

db_params = config['database']
inserted = 0
try:
    with PGDataBase(
        host=db_params['host'],
        database=db_params['dbname'],
        user=db_params['user'],
        password=db_params['password'],
    ) as database:
        database.post_many(sql, params_seq)
        inserted = len(params_seq)

    logging.info(f"В БД успешно загружено {inserted} строк.")
except Exception as e:
    logging.error(f"Ошибка при вставке данных: {e}")

