In [50]:
import pandas as pd
import os
import logging
import shutil
from sqlalchemy import create_engine, text
from sqlalchemy.orm import sessionmaker
from sshtunnel import SSHTunnelForwarder

# Constants
FOLDER_PATH_IN = 'C:\\Users\\dmandree\\Downloads\\TL_new'
FOLDER_PATH_OUT = 'C:\\Users\\dmandree\\Downloads\\TL_arch'

companies = ['Guess Kazakhstan', 'Guess CIS']

DB_PARAMS = {
    'database': 'postgres',
    'user': 'postgres',
    'password': '1296',
    'host': 'localhost'
}

SSH_TUNNEL_PARAMS = {
    'ssh_address_or_host': ('79.174.86.163', 22),
    'ssh_username': 'root',
    'ssh_password': 'S0SJcmYwL0ZsmUId',
    'remote_bind_address': ('127.0.0.1', 5432),
    'local_bind_address': ('127.0.0.1', 8001)
}

# Logging
logging.basicConfig(level=logging.INFO)

# Function to read Excel files
def read_excel_files(folder_path_in, folder_path_out):
    file_list = os.listdir(folder_path_in)
    dfs = []
    for file in file_list:
        file_path = os.path.join(folder_path_in, file)
        with pd.ExcelFile(file_path) as xls:
            data = pd.read_excel(xls, sheet_name='TurnoverList')
            dfs.append(data)
        # Moving the file after processing
        move_processed_file(file_path, folder_path_out, file)
    
    # Check if the list is not empty    
    if dfs:
        df = pd.concat(dfs, ignore_index=True)
        return df
    else:
        print("No Excel files found or no data read from the files.")
        return None

# Function to move file to archive folder
def move_processed_file(file_path, folder_path_out, file):
    new_path = os.path.join(folder_path_out, file)
    if os.path.exists(new_path):
        try:
            os.remove(new_path)
        except PermissionError:
            print("The file is in use by another process and cannot be deleted.")
    try:
        shutil.move(file_path, folder_path_out)
    except (IOError, shutil.Error) as e:
        print(f"Error while moving file '{file_path}': {e}")

# Function to process data
def process_data(df):
    try:
        df["Day"] = pd.to_datetime(df["Day"]).dt.date
    except KeyError:
        print("Column 'Day' not found in dataframe. Skipping this step.")
    df = df.loc[df['Company'].isin(companies)]
    df.columns = df.columns.str.lower()
    return df

# Function to filtering unique dates in a dataframe
def create_outer_df(df):
    unique_combinations = df['day'].unique()
    outer_df = pd.DataFrame(unique_combinations, columns=['key'])
    return outer_df

# Function for creating an SSH tunnel
def create_ssh_tunnel():
    ssh_tunnel = SSHTunnelForwarder(**SSH_TUNNEL_PARAMS)
    return ssh_tunnel

# Function to connecting to a database
def create_db_engine(ssh_tunnel):
    DB_PARAMS['port'] = ssh_tunnel.local_bind_port
    engine_str = f"postgresql://{DB_PARAMS['user']}:{DB_PARAMS['password']}@{DB_PARAMS['host']}:{DB_PARAMS['port']}/{DB_PARAMS['database']}"
    engine = create_engine(engine_str)
    return engine

# Function to get date intersections
def get_intersections(engine, df):
    query = text('select DISTINCT day as key from sales')
    inner_df = pd.read_sql(query, engine)['key']
    inner_df = df['day'].unique()
    intersection_df = pd.merge(create_outer_df(df), pd.DataFrame({'key': inner_df}), on='key', how='inner')['key'].tolist()
    return intersection_df
    
# Function to remove intersections from the database
def delete_intersections(session, intersection_df):
    if intersection_df:
        delete_query = text('DELETE FROM sales WHERE day = ANY(:keys)')
        try:
            session.execute(delete_query, {'keys': intersection_df})
            session.commit()
        except Exception as e:
            print(f"Error while deleting records: {e}")

# Function to load data to database
def load_data_to_db(df, engine):
    try:
        with engine.connect() as conn:
            df.to_sql("sales", conn, if_exists="append", index=False)
    except Exception as e:
        print(f"Error while loading data to the database: {e}")

# Main function
def main():
    # Read Excel files
    df = read_excel_files(FOLDER_PATH_IN, FOLDER_PATH_OUT)

    # Process data
    df = process_data(df)
    
    # Create SSH tunnel
    with create_ssh_tunnel() as ssh_tunnel:
        
        # Create database engine
        engine = create_db_engine(ssh_tunnel)
        
        # Create session
        Session = sessionmaker(bind=engine)
    
        with Session() as session:
                
            # Create intersections
            intersection_df = get_intersections(engine, df)
            
            # Remove intersections from the database
            delete_intersections(session, intersection_df)
            
            # Load data to database
            load_data_to_db(df, engine)   

if __name__ == '__main__':
    main()

INFO:paramiko.transport:Connected (version 2.0, client OpenSSH_8.9p1)
INFO:paramiko.transport:Authentication (password) successful!
