In [2]:
import ftplib
import zipfile
import io
import pandas as pd
import numpy as np
from typing import Dict, List
from IPython.display import display
from clickhouse_driver import Client
import sshtunnel
from sshtunnel import SSHTunnelForwarder
from clickhouse_connect import get_client
import logging
from typing import Dict
import os

#Настройки отображения
pd.options.display.max_columns = None
pd.options.display.max_rows = None

In [3]:
# Соединение с FTP-папкой и получение списка файлов
def connect_and_list_ftp_files(host: str, user: str, password: str, folder: str = '') -> tuple:
    """Подключение к FTP и получение списка ZIP-файлов"""
    ftp = ftplib.FTP(host)
    ftp.login(user, password)
    
    if folder:
        ftp.cwd(folder)
    
    zip_files = [f for f in ftp.nlst() if f.lower().endswith('.zip')]
    return ftp, zip_files

# Загрузка файлов
def load_all_csv_from_zip_chunked(ftp: ftplib.FTP, zip_filename: str, chunksize=10000, max_files=None):
    """Версия с обработкой таймаутов"""
    def reset_connection():
        nonlocal ftp
        print("Переподключение к FTP...")
        ftp.close()
        ftp = ftplib.FTP(host, timeout=300)
        ftp.login(user, password)
        if folder:
            ftp.cwd(folder)
    
    bio = io.BytesIO()
    try:
        ftp.retrbinary(f'RETR {zip_filename}', bio.write)
    except ftplib.error_temp as e:
        if "421 Timeout" in str(e):
            reset_connection()
            bio = io.BytesIO()
            ftp.retrbinary(f'RETR {zip_filename}', bio.write)
        else:
            raise
    
    bio.seek(0)
    dfs = []
    
    with zipfile.ZipFile(bio) as zipf:
        csv_files = [f for f in zipf.namelist() if f.lower().endswith('.csv')]
        
        for i, csv_filename in enumerate(csv_files):
            if max_files and i >= max_files:
                break
                
            try:
                # Поддержание соединения
                if i % 5 == 0:
                    ftp.voidcmd("NOOP")
                
                with zipf.open(csv_filename) as csv_file:
                    chunks = []
                    for chunk in pd.read_csv(csv_file, chunksize=chunksize, sep=';'):
                        chunks.append(chunk)
                    
                    if chunks:
                        df = pd.concat(chunks)
                        dfs.append(df)
            except ftplib.error_temp as e:
                if "421 Timeout" in str(e):
                    reset_connection()
                    continue
                raise
    
    return dfs


In [None]:
if __name__ == "__main__":
    # Подключение и список файлов
    ftp, available_files = connect_and_list_ftp_files(
        host='IP Вашего хоста',
        user='Логин юзера FTP папки',
        password='Пароль от юзера FTP папки',
        folder=''
    )
    
    print("\nДоступные ZIP-архивы:")
    for i, filename in enumerate(available_files):
        print(f"{i}. {filename}")
    


Доступные ZIP-архивы:
0. GTIN организации.zip
1. Движения.zip
2. Контрагенты.zip
3. Места деятельности.zip
4. Оборачиваемость товара по месяцу производства.zip
5. Оборачиваемость товара по периоду реализации.zip
6. Отслеживаемые GTIN.zip
7. Продажи.zip
8. Средние цены.zip


In [4]:
all_dfs = []  # Список всех датафреймов
df_mapping = {}  # Соответствие имени df_N → исходному файлу
    
for zip_idx, zip_filename in enumerate(available_files):
        print(f"\nЗагрузка {zip_filename}...")
        dfs_from_zip = load_all_csv_from_zip_chunked(ftp, zip_filename)
        
        for csv_idx, df in enumerate(dfs_from_zip):
            df_name = f"df_{len(all_dfs)}"  # Имя вида df_0, df_1, ...
            globals()[df_name] = df  # Сохраняем в глобальную область видимости
            all_dfs.append(df)
            
            # Запоминаем соответствие df_N → файл
            df_mapping[df_name] = {
                'zip': zip_filename,
                'csv': f"CSV_{csv_idx} из {zip_filename}",
                'shape': df.shape
            }
    
    # 3. Вывод отчета
print("\nРезультат загрузки:")
for df_name, info in df_mapping.items():
        print(f"{df_name} ← {info['csv']} (строк: {info['shape'][0]}, столбцов: {info['shape'][1]})")
    
    # Закрытие соединения
ftp.quit()


Загрузка GTIN организации.zip...

Загрузка Движения.zip...

Загрузка Контрагенты.zip...

Загрузка Места деятельности.zip...

Загрузка Оборачиваемость товара по месяцу производства.zip...

Загрузка Оборачиваемость товара по периоду реализации.zip...

Загрузка Отслеживаемые GTIN.zip...

Загрузка Продажи.zip...

Загрузка Средние цены.zip...

Результат загрузки:
df_0 ← CSV_0 из GTIN организации.zip (строк: 61, столбцов: 7)
df_1 ← CSV_0 из Движения.zip (строк: 4287872, столбцов: 22)
df_2 ← CSV_0 из Контрагенты.zip (строк: 109981, столбцов: 5)
df_3 ← CSV_0 из Места деятельности.zip (строк: 88256, столбцов: 42)
df_4 ← CSV_0 из Оборачиваемость товара по месяцу производства.zip (строк: 366, столбцов: 11)
df_5 ← CSV_0 из Оборачиваемость товара по периоду реализации.zip (строк: 2337, столбцов: 13)
df_6 ← CSV_0 из Отслеживаемые GTIN.zip (строк: 61, столбцов: 10)
df_7 ← CSV_0 из Продажи.zip (строк: 18385526, столбцов: 36)
df_8 ← CSV_0 из Средние цены.zip (строк: 214640, столбцов: 23)


'221 Goodbye.'

In [None]:
# Переименование DataFrame для лучшего понимания контекста
df_gtin_organization = df_0
df_movement = df_1
df_contragents = df_2
df_location_sales = df_3
df_turnover_month = df_4
df_turnover_period = df_5
df_gtin_monitoring = df_6
df_sales = df_7
df_averange_prices = df_8

named_dfs = {
    'GTIN организации': df_0, 
    'Движения': df_1, 
    'Контрагенты': df_2,
    'Места деятельности':df_3,
    'Оборачиваемость товара по месяцу производства':df_4,
    'Оборачиваемость товара по периоду реализации':df_5,
    'Отслеживаемые GTIN':df_6,
    'Продажи':df_7,
    'Средние цены':df_8,
}

# Вывод основоной информации о каждом DataFrame
for name, df in named_dfs.items():
    print(f"\nДатафрейм: {name}")
    display(pd.DataFrame({
        'Column': df.columns,
        'Type': df.dtypes,
        'Missing %': (df.isnull().mean()*100).round(1),
        'Unique Values': df.nunique()
    }))


Датафрейм: GTIN организации


Unnamed: 0,Column,Type,Missing %,Unique Values
gtin,gtin,int64,0.0,61
brand,brand,object,0.0,7
name,name,object,0.0,31
category,category,object,1.6,3
custom_name,custom_name,float64,100.0,0
custom_id,custom_id,float64,100.0,0
organization_name,organization_name,object,0.0,1



Датафрейм: Движения


Unnamed: 0,Column,Type,Missing %,Unique Values
tg,tg,object,0.0,1
producer_inn,producer_inn,int64,0.0,1
producer_name,producer_name,object,0.0,1
current_inn,current_inn,object,0.6,31987
current_name,current_name,object,1.1,23291
next_inn,next_inn,object,71.3,160355
next_name,next_name,object,72.6,90754
gtin,gtin,int64,0.0,22
oper_type,oper_type,object,0.0,11
product_name,product_name,object,0.0,22



Датафрейм: Контрагенты


Unnamed: 0,Column,Type,Missing %,Unique Values
inn,inn,object,0.0,109981
name,name,object,9.2,93018
custom_type,custom_type,float64,100.0,0
type,type,float64,100.0,0
partner_group,partner_group,object,0.0,3



Датафрейм: Места деятельности


Unnamed: 0,Column,Type,Missing %,Unique Values
id,id,object,0.0,88256
inn,inn,object,0.0,14552
type,type,float64,100.0,0
address,address,object,0.0,78454
postal_code,postal_code,float64,2.9,11937
federal_district,federal_district,object,0.4,8
federal_subject,federal_subject,object,0.4,85
area,area,object,73.3,1505
city,city,object,36.6,1156
city_area,city_area,object,87.9,12



Датафрейм: Оборачиваемость товара по месяцу производства


Unnamed: 0,Column,Type,Missing %,Unique Values
tg,tg,object,0.0,1
producer_inn,producer_inn,int64,0.0,1
producer_name,producer_name,object,0.0,1
gtin,gtin,int64,0.0,20
product_name,product_name,object,0.0,20
days,days,float64,0.0,366
year,year,int64,0.0,3
quarter,quarter,int64,0.0,4
month,month,int64,0.0,12
product_brand,product_brand,object,0.0,4



Датафрейм: Оборачиваемость товара по периоду реализации


Unnamed: 0,Column,Type,Missing %,Unique Values
tg,tg,object,0.0,1
producer_inn,producer_inn,int64,0.0,1
producer_name,producer_name,object,0.0,1
gtin,gtin,int64,0.0,22
product_name,product_name,object,0.0,22
days,days,float64,0.0,2334
year,year,int64,0.0,3
quarter,quarter,int64,0.0,4
month,month,int64,0.0,12
week,week,int64,0.0,52



Датафрейм: Отслеживаемые GTIN


Unnamed: 0,Column,Type,Missing %,Unique Values
enabled,enabled,bool,0.0,1
gtin,gtin,int64,0.0,61
brand,brand,object,0.0,7
name,name,object,0.0,31
category,category,object,1.6,3
custom_name,custom_name,float64,100.0,0
custom_id,custom_id,float64,100.0,0
organization_name,organization_name,object,0.0,1
date_added,date_added,object,0.0,23
date_disabled,date_disabled,float64,100.0,0



Датафрейм: Продажи


Unnamed: 0,Column,Type,Missing %,Unique Values
tg,tg,object,0.0,1
producer_inn,producer_inn,int64,0.0,1
producer_name,producer_name,object,0.0,1
inn,inn,object,0.0,14792
name,name,object,0.0,12308
federal_district,federal_district,object,0.0,8
federal_subject_code,federal_subject_code,object,0.0,171
federal_subject,federal_subject,object,0.0,86
address,address,object,0.0,70044
gtin,gtin,int64,0.0,22



Датафрейм: Средние цены


Unnamed: 0,Column,Type,Missing %,Unique Values
tg,tg,object,0.0,1
producer_inn,producer_inn,int64,0.0,1
producer_name,producer_name,object,0.0,1
federal_district,federal_district,object,0.0,8
federal_subject_code,federal_subject_code,int64,0.0,80
federal_subject,federal_subject,object,0.0,80
inn,inn,int64,0.0,1422
name,name,object,0.1,1372
gtin,gtin,int64,0.0,21
product_name,product_name,object,0.0,21


## Далее устанавливаем соединение с ClickHouse через SSH - тоннель

In [None]:
# Параметры SSH-туннеля
SSH_HOST = "IP хоста" 
SSH_PORT = 22 # стандартный вариант, но Ваш может не совпадать
SSH_USER = "Логин пользователя базы данных"
SSH_PRIVATE_KEY_PATH = "Путь к Вашему приватному ключу"
SSH_PRIVATE_KEY_PASSWORD = "Пароль от приватного ключа"

# Параметры ClickHouse
CLICKHOUSE_HOST = 'Хост ClickHouse'
CLICKHOUSE_PORT = 8123 # стандартный вариант, но Ваш может не совпадать
CLICKHOUSE_USER = 'Логин юзера ClickHouse'
CLICKHOUSE_PASSWORD = 'Пароль юзера ClickHouse'

# Локальный порт, на который будет туннелироваться ClickHouse
LOCAL_BIND_PORT = 8123 # Можно оставить 8123 или выбрать другой свободный порт, например 5555

with sshtunnel.SSHTunnelForwarder(
    (SSH_HOST, SSH_PORT),
    ssh_username=SSH_USER,
    ssh_pkey=SSH_PRIVATE_KEY_PATH,
    ssh_private_key_password=SSH_PRIVATE_KEY_PASSWORD,
    remote_bind_address=(CLICKHOUSE_HOST, CLICKHOUSE_PORT),
    local_bind_address=('127.0.0.1', LOCAL_BIND_PORT)
) as tunnel:
    print(f"SSH-туннель запущен. Локальный порт: {LOCAL_BIND_PORT}")

    try:
        # Теперь вы можете подключиться к ClickHouse через локальный порт, используя clickhouse-connect
        client = get_client(
            host='127.0.0.1',
            port=LOCAL_BIND_PORT,
            user=CLICKHOUSE_USER,
            password=CLICKHOUSE_PASSWORD,
            # secure=True # Используйте secure=True, если ClickHouse настроен на HTTPS на порту 8443
        )

        # Выполните запрос
        result = client.query('SELECT 1 as one, \'Hello, ClickHouse!\' as message')
        print(f"Результат запроса: {result.result_rows}")

        client.close()
        print("Соединение с ClickHouse закрыто.")

    except Exception as e:
        print(f"Ошибка при подключении к ClickHouse: {e}")

print("SSH-туннель закрыт.")

2025-07-30 15:35:12,116| ERROR   | Password is required for key C:\Users\m.rybakov/.ssh\id_ed25519


SSH-туннель запущен. Локальный порт: 8123
Результат запроса: [(1, 'Hello, ClickHouse!')]
Соединение с ClickHouse закрыто.
SSH-туннель закрыт.


## Когда соединение установлено, начинаем подгружать DataFrame в нашу БД

In [None]:
table_name = 'gtin_organization'

# Для лучшего соответствия ClickHouse и работы с NULLable типами в Pandas 1.0+
# (для строковых и целочисленных столбцов, которые могут содержать NaN/None)
# Хотя clickhouse-connect хорошо справляется с object и int64, явное преобразование
# в Pandas Nullable dtypes (string, Int64) может быть полезно для ясности и консистентности.
# Если есть NaN, ClickHouse будет хранить их как NULL, если тип Nullable.

df_gtin_organization['gtin'] = df_gtin_organization['gtin'].astype('Int64')
df_gtin_organization['brand'] = df_gtin_organization['brand'].astype('string')
df_gtin_organization['name'] = df_gtin_organization['name'].astype('string')
df_gtin_organization['category'] = df_gtin_organization['category'].astype('string')
df_gtin_organization['organization_name'] = df_gtin_organization['organization_name'].astype('string')
database_name = 'data_rida'

print("DataFrame для записи:")
print(df_gtin_organization.dtypes)
print("\n" + "="*50 + "\n")

with sshtunnel.SSHTunnelForwarder(
    (SSH_HOST, SSH_PORT),
    ssh_username=SSH_USER,
    ssh_pkey=SSH_PRIVATE_KEY_PATH,
    ssh_private_key_password=SSH_PRIVATE_KEY_PASSWORD,
    remote_bind_address=(CLICKHOUSE_HOST, CLICKHOUSE_PORT),
    local_bind_address=('127.0.0.1', LOCAL_BIND_PORT)
) as tunnel:
    print(f"SSH-туннель запущен. Локальный порт: {LOCAL_BIND_PORT}")

    try:
        client = get_client(
            host='127.0.0.1',
            port=LOCAL_BIND_PORT,
            user=CLICKHOUSE_USER,
            password=CLICKHOUSE_PASSWORD,
            database=database_name
        )

        create_table_sql = f'''
            CREATE TABLE IF NOT EXISTS {table_name} (
                gtin              Int64,
                brand             Nullable(String),
                name              Nullable(String),
                category          Nullable(String),
                custom_name       Nullable(Float64),
                custom_id         Nullable(Float64),
                organization_name Nullable(String)
            ) ENGINE = MergeTree()
            ORDER BY gtin -- Важно выбрать подходящий ключ сортировки
        '''
        client.command(create_table_sql)
        print(f"Таблица '{table_name}' проверена/создана в ClickHouse.")

        # Выполните запрос
        client.insert_df(f'{database_name}.{table_name}', df_gtin_organization)
        print(f"Данные из DataFrame успешно записаны в таблицу '{table_name}'.")

        # Проверка данных
        result = client.query(f"SELECT * FROM {database_name}.{table_name} LIMIT 5")
        print("\nПроверка записанных данных (первые 5 строк):")
        print(pd.DataFrame(result.result_rows, columns=result.column_names))

        client.close()
        print("Соединение с ClickHouse закрыто.")

    except Exception as e:
        print(f"Ошибка при подключении к ClickHouse: {e}")

print("SSH-туннель закрыт.")

2025-07-30 15:35:15,446| ERROR   | Password is required for key C:\Users\m.rybakov/.ssh\id_ed25519


DataFrame для записи:
gtin                          Int64
brand                string[python]
name                 string[python]
category             string[python]
custom_name                 float64
custom_id                   float64
organization_name    string[python]
dtype: object


SSH-туннель запущен. Локальный порт: 8123
Таблица 'gtin_organization' проверена/создана в ClickHouse.
Данные из DataFrame успешно записаны в таблицу 'gtin_organization'.

Проверка записанных данных (первые 5 строк):
            gtin           brand  \
0  4670010850023  BAIKAL RESERVE   
1  4670010850023  BAIKAL RESERVE   
2  4670010850023  BAIKAL RESERVE   
3  4670010850023  BAIKAL RESERVE   
4  4670010850023  BAIKAL RESERVE   

                                                name  \
0  Вода минеральная природная лечебно-столовая пи...   
1  Вода минеральная природная лечебно-столовая пи...   
2  Вода минеральная природная лечебно-столовая пи...   
3  Вода минеральная природная лечебно-столовая пи... 

In [None]:
table_name = 'movement'

df_movement['tg'] = df_movement['tg'].astype('string')
df_movement['producer_inn'] = df_movement['producer_inn'].astype('Int64')
df_movement['producer_name'] = df_movement['producer_name'].astype('string')
df_movement['current_inn'] = df_movement['current_inn'].astype('string')
df_movement['current_name'] = df_movement['current_name'].astype('string')
df_movement['next_inn'] = df_movement['next_inn'].astype('string')
df_movement['next_name'] = df_movement['next_name'].astype('string')
df_movement['gtin'] = df_movement['gtin'].astype('Int64')
df_movement['oper_type'] = df_movement['oper_type'].astype('string')
df_movement['product_name'] = df_movement['product_name'].astype('string')
df_movement['cnt'] = df_movement['cnt'].astype('Int64')
df_movement['year'] = df_movement['year'].astype('Int64')
df_movement['quarter'] = df_movement['quarter'].astype('Int64')
df_movement['month'] = df_movement['month'].astype('Int64')
df_movement['week'] = df_movement['week'].astype('Int64')
df_movement['week_year'] = df_movement['week_year'].astype('Int64')
df_movement['product_brand'] = df_movement['product_brand'].astype('string')
df_movement['gtin_group'] = df_movement['gtin_group'].astype('string')
df_movement['retail_network_current'] = df_movement['retail_network_current'].astype('string')
df_movement['managment_company_current'] = df_movement['managment_company_current'].astype('string')
df_movement['retail_network_next'] = df_movement['retail_network_next'].astype('string')
df_movement['managment_company_next'] = df_movement['managment_company_next'].astype('string')

database_name = 'data_rida'

print("DataFrame для записи:")
print(df_movement.dtypes)
print("\n" + "="*50 + "\n")

with sshtunnel.SSHTunnelForwarder(
    (SSH_HOST, SSH_PORT),
    ssh_username=SSH_USER,
    ssh_pkey=SSH_PRIVATE_KEY_PATH,
    ssh_private_key_password=SSH_PRIVATE_KEY_PASSWORD,
    remote_bind_address=(CLICKHOUSE_HOST, CLICKHOUSE_PORT),
    local_bind_address=('127.0.0.1', LOCAL_BIND_PORT)
) as tunnel:
    print(f"SSH-туннель запущен. Локальный порт: {LOCAL_BIND_PORT}")

    try:
        client = get_client(
            host='127.0.0.1',
            port=LOCAL_BIND_PORT,
            user=CLICKHOUSE_USER,
            password=CLICKHOUSE_PASSWORD,
            database=database_name
        )

        create_table_sql = f'''
            CREATE TABLE IF NOT EXISTS {table_name} (
                tg                          Nullable(String),
                producer_inn                Int64,
                producer_name               Nullable(String),
                current_inn                 Nullable(String),
                current_name                Nullable(String),
                next_inn                    Nullable(String),
                next_name                   Nullable(String),
                gtin                        Int64,
                oper_type                   Nullable(String),
                product_name                Nullable(String),
                cnt                         Int64,
                year                        Int64,
                quarter                     Int64,
                month                       Int64,
                week                        Int64,
                week_year                   Int64,
                product_brand               Nullable(String),
                gtin_group                  Nullable(String),
                retail_network_current      Nullable(String),
                managment_company_current   Nullable(String),
                retail_network_next         Nullable(String),
                managment_company_next      Nullable(String)
            ) ENGINE = MergeTree()
            ORDER BY gtin -- Важно выбрать подходящий ключ сортировки
        '''
        client.command(create_table_sql)
        print(f"Таблица '{table_name}' проверена/создана в ClickHouse.")

        # Выполните запрос
        client.insert_df(f'{database_name}.{table_name}', df_movement)
        print(f"Данные из DataFrame успешно записаны в таблицу '{table_name}'.")

        # Проверка данных
        result = client.query(f"SELECT * FROM {database_name}.{table_name} LIMIT 5")
        print("\nПроверка записанных данных (первые 5 строк):")
        print(pd.DataFrame(result.result_rows, columns=result.column_names))

        client.close()
        print("Соединение с ClickHouse закрыто.")

    except Exception as e:
        print(f"Ошибка при подключении к ClickHouse: {e}")

print("SSH-туннель закрыт.")

2025-07-30 15:35:24,639| ERROR   | Password is required for key C:\Users\m.rybakov/.ssh\id_ed25519


DataFrame для записи:
tg                           string[python]
producer_inn                          Int64
producer_name                string[python]
current_inn                  string[python]
current_name                 string[python]
next_inn                     string[python]
next_name                    string[python]
gtin                                  Int64
oper_type                    string[python]
product_name                 string[python]
cnt                                   Int64
year                                  Int64
quarter                               Int64
month                                 Int64
week                                  Int64
week_year                             Int64
product_brand                string[python]
gtin_group                   string[python]
retail_network_current       string[python]
managment_company_current    string[python]
retail_network_next          string[python]
managment_company_next       string[python]
dtype: obj

In [None]:
table_name = 'contragents'

df_contragents['inn'] = df_contragents['inn'].astype('string')
df_contragents['name'] = df_contragents['name'].astype('string')
df_contragents['partner_group'] = df_contragents['partner_group'].astype('string')

database_name = 'data_rida'

print("DataFrame для записи:")
print(df_contragents.dtypes)
print("\n" + "="*50 + "\n")

with sshtunnel.SSHTunnelForwarder(
    (SSH_HOST, SSH_PORT),
    ssh_username=SSH_USER,
    ssh_pkey=SSH_PRIVATE_KEY_PATH,
    ssh_private_key_password=SSH_PRIVATE_KEY_PASSWORD,
    remote_bind_address=(CLICKHOUSE_HOST, CLICKHOUSE_PORT),
    local_bind_address=('127.0.0.1', LOCAL_BIND_PORT)
) as tunnel:
    print(f"SSH-туннель запущен. Локальный порт: {LOCAL_BIND_PORT}")

    try:
        client = get_client(
            host='127.0.0.1',
            port=LOCAL_BIND_PORT,
            user=CLICKHOUSE_USER,
            password=CLICKHOUSE_PASSWORD,
            database=database_name
        )

        create_table_sql = f'''
            CREATE TABLE IF NOT EXISTS {table_name} (
                inn               String,
                name              Nullable(String),
                custom_type       Nullable(Float64),
                type              Nullable(Float64),
                partner_group     Nullable(String)
            ) ENGINE = MergeTree()
            ORDER BY inn -- Важно выбрать подходящий ключ сортировки
        '''
        client.command(create_table_sql)
        print(f"Таблица '{table_name}' проверена/создана в ClickHouse.")

        # Выполните запрос
        client.insert_df(f'{database_name}.{table_name}', df_contragents)
        print(f"Данные из DataFrame успешно записаны в таблицу '{table_name}'.")

        # Проверка данных
        result = client.query(f"SELECT * FROM {database_name}.{table_name} LIMIT 5")
        print("\nПроверка записанных данных (первые 5 строк):")
        print(pd.DataFrame(result.result_rows, columns=result.column_names))

        client.close()
        print("Соединение с ClickHouse закрыто.")

    except Exception as e:
        print(f"Ошибка при подключении к ClickHouse: {e}")

print("SSH-туннель закрыт.")

2025-07-30 15:37:38,366| ERROR   | Password is required for key C:\Users\m.rybakov/.ssh\id_ed25519


DataFrame для записи:
inn              string[python]
name             string[python]
custom_type             float64
type                    float64
partner_group    string[python]
dtype: object


SSH-туннель запущен. Локальный порт: 8123
Таблица 'contragents' проверена/создана в ClickHouse.
Данные из DataFrame успешно записаны в таблицу 'contragents'.

Проверка записанных данных (первые 5 строк):
  inn name custom_type  type partner_group
0   -    -        None  None        [NULL]
1   -    -        None  None        [NULL]
2   -    -        None  None        [NULL]
3   -    -        None  None        [NULL]
4   -    -        None  None        [NULL]
Соединение с ClickHouse закрыто.
SSH-туннель закрыт.


In [None]:
table_name = 'location_sales'

df_location_sales['id'] = df_location_sales['id'].astype('string')
df_location_sales['inn'] = df_location_sales['inn'].astype('string')
df_location_sales['address'] = df_location_sales['address'].astype('string')
df_location_sales['federal_district'] = df_location_sales['federal_district'].astype('string')
df_location_sales['federal_subject'] = df_location_sales['federal_subject'].astype('string')
df_location_sales['area'] = df_location_sales['area'].astype('string')
df_location_sales['city'] = df_location_sales['city'].astype('string')
df_location_sales['city_area'] = df_location_sales['city_area'].astype('string')
df_location_sales['city_district'] = df_location_sales['city_district'].astype('string')
df_location_sales['settlement'] = df_location_sales['settlement'].astype('string')
df_location_sales['street'] = df_location_sales['street'].astype('string')
df_location_sales['house'] = df_location_sales['house'].astype('string')
df_location_sales['block'] = df_location_sales['block'].astype('string')
df_location_sales['flat'] = df_location_sales['flat'].astype('string')
df_location_sales['geo_lat'] = df_location_sales['geo_lat'].astype('string')
df_location_sales['geo_lon'] = df_location_sales['geo_lon'].astype('string')
df_location_sales['region_fias_id'] = df_location_sales['region_fias_id'].astype('string')
df_location_sales['area_fias_id'] = df_location_sales['area_fias_id'].astype('string')
df_location_sales['city_fias_id'] = df_location_sales['city_fias_id'].astype('string')
df_location_sales['city_district_fias_id'] = df_location_sales['city_district_fias_id'].astype('string')
df_location_sales['settlement_fias_id'] = df_location_sales['settlement_fias_id'].astype('string')
df_location_sales['street_fias_id'] = df_location_sales['street_fias_id'].astype('string')
df_location_sales['house_fias_id'] = df_location_sales['house_fias_id'].astype('string')
df_location_sales['partner_name'] = df_location_sales['partner_name'].astype('string')
df_location_sales['partner_group'] = df_location_sales['partner_group'].astype('string')
df_location_sales['user_address_id'] = df_location_sales['user_address_id'].astype('string')

database_name = 'data_rida'

print("DataFrame для записи:")
print(df_location_sales.dtypes)
print("\n" + "="*50 + "\n")

with sshtunnel.SSHTunnelForwarder(
    (SSH_HOST, SSH_PORT),
    ssh_username=SSH_USER,
    ssh_pkey=SSH_PRIVATE_KEY_PATH,
    ssh_private_key_password=SSH_PRIVATE_KEY_PASSWORD,
    remote_bind_address=(CLICKHOUSE_HOST, CLICKHOUSE_PORT),
    local_bind_address=('127.0.0.1', LOCAL_BIND_PORT)
) as tunnel:
    print(f"SSH-туннель запущен. Локальный порт: {LOCAL_BIND_PORT}")

    try:
        client = get_client(
            host='127.0.0.1',
            port=LOCAL_BIND_PORT,
            user=CLICKHOUSE_USER,
            password=CLICKHOUSE_PASSWORD,
            database=database_name
        )

        create_table_sql = f'''
            CREATE TABLE IF NOT EXISTS {table_name} (
                id                      Nullable(String),
                inn                     String,
                type                    Nullable(Float64),
                address                 Nullable(String),
                postal_code             Nullable(Float64),
                federal_district        Nullable(String),
                federal_subject         Nullable(String),
                area                    Nullable(String),
                city                    Nullable(String),
                city_area               Nullable(String),
                city_district           Nullable(String),
                settlement              Nullable(String),
                street                  Nullable(String),
                house                   Nullable(String),
                block                   Nullable(String),
                flat                    Nullable(String),
                room                    Nullable(Float64),
                geo_lat                 Nullable(String),
                geo_lon                 Nullable(String),
                okato                   Nullable(Float64),
                region_fias_id          Nullable(String),
                region_kladr_id         Nullable(Float64),
                area_fias_id            Nullable(String),
                area_kladr_id           Nullable(Float64),
                city_fias_id            Nullable(String),
                city_kladr_id           Nullable(Float64),
                city_district_fias_id   Nullable(String),
                city_district_kladr_id  Nullable(Float64),
                settlement_fias_id      Nullable(String),
                settlement_kladr_id     Nullable(Float64),
                street_fias_id          Nullable(String),
                street_kladr_id         Nullable(Float64),
                house_fias_id           Nullable(String),
                house_kladr_id          Nullable(Float64),
                kladr_id                Nullable(Float64),
                partner_name            Nullable(String),
                partner_type            Nullable(Float64),
                partner_custom_type     Nullable(Float64),
                partner_group           Nullable(String),
                custom_id               Nullable(Float64),
                custom_type             Nullable(Float64),
                user_address_id         Nullable(String)
            ) ENGINE = MergeTree()
            ORDER BY inn -- Важно выбрать подходящий ключ сортировки
        '''
        client.command(create_table_sql)
        print(f"Таблица '{table_name}' проверена/создана в ClickHouse.")

        # Выполните запрос
        client.insert_df(f'{database_name}.{table_name}', df_location_sales)
        print(f"Данные из DataFrame успешно записаны в таблицу '{table_name}'.")

        # Проверка данных
        result = client.query(f"SELECT * FROM {database_name}.{table_name} LIMIT 5")
        print("\nПроверка записанных данных (первые 5 строк):")
        print(pd.DataFrame(result.result_rows, columns=result.column_names))

        client.close()
        print("Соединение с ClickHouse закрыто.")

    except Exception as e:
        print(f"Ошибка при подключении к ClickHouse: {e}")

print("SSH-туннель закрыт.")

2025-07-30 15:37:46,784| ERROR   | Password is required for key C:\Users\m.rybakov/.ssh\id_ed25519


DataFrame для записи:
id                        string[python]
inn                       string[python]
type                             float64
address                   string[python]
postal_code                      float64
federal_district          string[python]
federal_subject           string[python]
area                      string[python]
city                      string[python]
city_area                 string[python]
city_district             string[python]
settlement                string[python]
street                    string[python]
house                     string[python]
block                     string[python]
flat                      string[python]
room                             float64
geo_lat                   string[python]
geo_lon                   string[python]
okato                            float64
region_fias_id            string[python]
region_kladr_id                  float64
area_fias_id              string[python]
area_kladr_id                    fl

In [None]:
table_name = 'turnover_month'

df_turnover_month['tg'] = df_turnover_month['tg'].astype('string')
df_turnover_month['producer_inn'] = df_turnover_month['producer_inn'].astype('Int64')
df_turnover_month['producer_name'] = df_turnover_month['producer_name'].astype('string')
df_turnover_month['gtin'] = df_turnover_month['gtin'].astype('Int64')
df_turnover_month['product_name'] = df_turnover_month['product_name'].astype('string')
df_turnover_month['year'] = df_turnover_month['year'].astype('Int64')
df_turnover_month['quarter'] = df_turnover_month['quarter'].astype('Int64')
df_turnover_month['month'] = df_turnover_month['month'].astype('Int64')
df_turnover_month['product_brand'] = df_turnover_month['product_brand'].astype('string')
df_turnover_month['gtin_group'] = df_turnover_month['gtin_group'].astype('string')


database_name = 'data_rida'

print("DataFrame для записи:")
print(df_turnover_month.dtypes)
print("\n" + "="*50 + "\n")

with sshtunnel.SSHTunnelForwarder(
    (SSH_HOST, SSH_PORT),
    ssh_username=SSH_USER,
    ssh_pkey=SSH_PRIVATE_KEY_PATH,
    ssh_private_key_password=SSH_PRIVATE_KEY_PASSWORD,
    remote_bind_address=(CLICKHOUSE_HOST, CLICKHOUSE_PORT),
    local_bind_address=('127.0.0.1', LOCAL_BIND_PORT)
) as tunnel:
    print(f"SSH-туннель запущен. Локальный порт: {LOCAL_BIND_PORT}")

    try:
        client = get_client(
            host='127.0.0.1',
            port=LOCAL_BIND_PORT,
            user=CLICKHOUSE_USER,
            password=CLICKHOUSE_PASSWORD,
            database=database_name
        )

        create_table_sql = f'''
            CREATE TABLE IF NOT EXISTS {table_name} (
                tg                          Nullable(String),
                producer_inn                Int64,
                producer_name               Nullable(String),
                gtin                        Int64,
                product_name                Nullable(String),
                days                        Nullable(Float64),
                year                        Int64,
                quarter                     Int64,
                month                       Int64,
                product_brand               Nullable(String),
                gtin_group                  Nullable(String)
            ) ENGINE = MergeTree()
            ORDER BY producer_inn -- Важно выбрать подходящий ключ сортировки
        '''
        client.command(create_table_sql)
        print(f"Таблица '{table_name}' проверена/создана в ClickHouse.")

        # Выполните запрос
        client.insert_df(f'{database_name}.{table_name}', df_turnover_month)
        print(f"Данные из DataFrame успешно записаны в таблицу '{table_name}'.")

        # Проверка данных
        result = client.query(f"SELECT * FROM {database_name}.{table_name} LIMIT 5")
        print("\nПроверка записанных данных (первые 5 строк):")
        print(pd.DataFrame(result.result_rows, columns=result.column_names))

        client.close()
        print("Соединение с ClickHouse закрыто.")

    except Exception as e:
        print(f"Ошибка при подключении к ClickHouse: {e}")

print("SSH-туннель закрыт.")

2025-07-30 15:38:21,616| ERROR   | Password is required for key C:\Users\m.rybakov/.ssh\id_ed25519


DataFrame для записи:
tg               string[python]
producer_inn              Int64
producer_name    string[python]
gtin                      Int64
product_name     string[python]
days                    float64
year                      Int64
quarter                   Int64
month                     Int64
product_brand    string[python]
gtin_group       string[python]
dtype: object


SSH-туннель запущен. Локальный порт: 8123
Таблица 'turnover_month' проверена/создана в ClickHouse.
Данные из DataFrame успешно записаны в таблицу 'turnover_month'.

Проверка записанных данных (первые 5 строк):
     tg  producer_inn                          producer_name           gtin  \
0  Вода    3849011431  БАЙКАЛСИ КАМПАНИ  АО ГК "МОРЕ БАЙКАЛ"  4670010850023   
1  Вода    3849011431  БАЙКАЛСИ КАМПАНИ  АО ГК "МОРЕ БАЙКАЛ"  4670010850030   
2  Вода    3849011431  БАЙКАЛСИ КАМПАНИ  АО ГК "МОРЕ БАЙКАЛ"  4670010850450   
3  Вода    3849011431  БАЙКАЛСИ КАМПАНИ  АО ГК "МОРЕ БАЙКАЛ"  4670010850498   
4  Во

In [None]:
table_name = 'turnover_period'

df_turnover_period['tg'] = df_turnover_period['tg'].astype('string')
df_turnover_period['producer_inn'] = df_turnover_period['producer_inn'].astype('Int64')
df_turnover_period['producer_name'] = df_turnover_period['producer_name'].astype('string')
df_turnover_period['gtin'] = df_turnover_period['gtin'].astype('Int64')
df_turnover_period['product_name'] = df_turnover_period['product_name'].astype('string')
df_turnover_period['year'] = df_turnover_period['year'].astype('Int64')
df_turnover_period['quarter'] = df_turnover_period['quarter'].astype('Int64')
df_turnover_period['month'] = df_turnover_period['month'].astype('Int64')
df_turnover_period['week'] = df_turnover_period['week'].astype('Int64')
df_turnover_period['week_year'] = df_turnover_period['week_year'].astype('Int64')
df_turnover_period['product_brand'] = df_turnover_period['product_brand'].astype('string')
df_turnover_period['gtin_group'] = df_turnover_period['gtin_group'].astype('string')


database_name = 'data_rida'

print("DataFrame для записи:")
print(df_turnover_period.dtypes)
print("\n" + "="*50 + "\n")

with sshtunnel.SSHTunnelForwarder(
    (SSH_HOST, SSH_PORT),
    ssh_username=SSH_USER,
    ssh_pkey=SSH_PRIVATE_KEY_PATH,
    ssh_private_key_password=SSH_PRIVATE_KEY_PASSWORD,
    remote_bind_address=(CLICKHOUSE_HOST, CLICKHOUSE_PORT),
    local_bind_address=('127.0.0.1', LOCAL_BIND_PORT)
) as tunnel:
    print(f"SSH-туннель запущен. Локальный порт: {LOCAL_BIND_PORT}")

    try:
        client = get_client(
            host='127.0.0.1',
            port=LOCAL_BIND_PORT,
            user=CLICKHOUSE_USER,
            password=CLICKHOUSE_PASSWORD,
            database=database_name
        )

        create_table_sql = f'''
            CREATE TABLE IF NOT EXISTS {table_name} (
                tg                          Nullable(String),
                producer_inn                Int64,
                producer_name               Nullable(String),
                gtin                        Int64,
                product_name                Nullable(String),
                days                        Nullable(Float64),
                year                        Int64,
                quarter                     Int64,
                month                       Int64,
                week                        Int64,
                week_year                   Int64,
                product_brand               Nullable(String),
                gtin_group                  Nullable(String)
            ) ENGINE = MergeTree()
            ORDER BY producer_inn -- Важно выбрать подходящий ключ сортировки
        '''
        client.command(create_table_sql)
        print(f"Таблица '{table_name}' проверена/создана в ClickHouse.")

        # Выполните запрос
        client.insert_df(f'{database_name}.{table_name}', df_turnover_period)
        print(f"Данные из DataFrame успешно записаны в таблицу '{table_name}'.")

        # Проверка данных
        result = client.query(f"SELECT * FROM {database_name}.{table_name} LIMIT 5")
        print("\nПроверка записанных данных (первые 5 строк):")
        print(pd.DataFrame(result.result_rows, columns=result.column_names))

        client.close()
        print("Соединение с ClickHouse закрыто.")

    except Exception as e:
        print(f"Ошибка при подключении к ClickHouse: {e}")

print("SSH-туннель закрыт.")

2025-07-30 15:38:24,723| ERROR   | Password is required for key C:\Users\m.rybakov/.ssh\id_ed25519


DataFrame для записи:
tg               string[python]
producer_inn              Int64
producer_name    string[python]
gtin                      Int64
product_name     string[python]
days                    float64
year                      Int64
quarter                   Int64
month                     Int64
week                      Int64
week_year                 Int64
product_brand    string[python]
gtin_group       string[python]
dtype: object


SSH-туннель запущен. Локальный порт: 8123
Таблица 'turnover_period' проверена/создана в ClickHouse.
Данные из DataFrame успешно записаны в таблицу 'turnover_period'.

Проверка записанных данных (первые 5 строк):
     tg  producer_inn                          producer_name           gtin  \
0  Вода    3849011431  БАЙКАЛСИ КАМПАНИ  АО ГК "МОРЕ БАЙКАЛ"  4670010850023   
1  Вода    3849011431  БАЙКАЛСИ КАМПАНИ  АО ГК "МОРЕ БАЙКАЛ"  4670010850030   
2  Вода    3849011431  БАЙКАЛСИ КАМПАНИ  АО ГК "МОРЕ БАЙКАЛ"  4670010850047   
3  Вода    3849011

In [None]:
table_name = 'gtin_monitoring'

df_gtin_monitoring['enabled'] = df_gtin_monitoring['enabled'].astype('Int64')
df_gtin_monitoring['gtin'] = df_gtin_monitoring['gtin'].astype('Int64')
df_gtin_monitoring['brand'] = df_gtin_monitoring['brand'].astype('string')
df_gtin_monitoring['name'] = df_gtin_monitoring['name'].astype('string')
df_gtin_monitoring['category'] = df_gtin_monitoring['category'].astype('string')
df_gtin_monitoring['organization_name'] = df_gtin_monitoring['organization_name'].astype('string')
df_gtin_monitoring['date_added'] = pd.to_datetime(df_gtin_monitoring['date_added'])
df_gtin_monitoring['date_disabled'] = pd.to_datetime(df_gtin_monitoring['date_disabled'])

database_name = 'data_rida'

print("DataFrame для записи:")
print(df_gtin_monitoring.dtypes)
print("\n" + "="*50 + "\n")

with sshtunnel.SSHTunnelForwarder(
    (SSH_HOST, SSH_PORT),
    ssh_username=SSH_USER,
    ssh_pkey=SSH_PRIVATE_KEY_PATH,
    ssh_private_key_password=SSH_PRIVATE_KEY_PASSWORD,
    remote_bind_address=(CLICKHOUSE_HOST, CLICKHOUSE_PORT),
    local_bind_address=('127.0.0.1', LOCAL_BIND_PORT)
) as tunnel:
    print(f"SSH-туннель запущен. Локальный порт: {LOCAL_BIND_PORT}")

    try:
        client = get_client(
            host='127.0.0.1',
            port=LOCAL_BIND_PORT,
            user=CLICKHOUSE_USER,
            password=CLICKHOUSE_PASSWORD,
            database=database_name
        )

        create_table_sql = f'''
            CREATE TABLE IF NOT EXISTS {table_name} (
                enabled             Int64,
                gtin                Int64,
                brand               Nullable(String),
                name                Nullable(String),
                category            Nullable(String),
                custom_name         Nullable(Float64),
                custom_id           Nullable(Float64),
                organization_name   Nullable(String),
                date_added          Nullable(DateTime),
                date_disabled       Nullable(DateTime)
            ) ENGINE = MergeTree()
            ORDER BY gtin -- Важно выбрать подходящий ключ сортировки
        '''
        client.command(create_table_sql)
        print(f"Таблица '{table_name}' проверена/создана в ClickHouse.")

        # Выполните запрос
        client.insert_df(f'{database_name}.{table_name}', df_gtin_monitoring)
        print(f"Данные из DataFrame успешно записаны в таблицу '{table_name}'.")

        # Проверка данных
        result = client.query(f"SELECT * FROM {database_name}.{table_name} LIMIT 5")
        print("\nПроверка записанных данных (первые 5 строк):")
        print(pd.DataFrame(result.result_rows, columns=result.column_names))

        client.close()
        print("Соединение с ClickHouse закрыто.")

    except Exception as e:
        print(f"Ошибка при подключении к ClickHouse: {e}")

print("SSH-туннель закрыт.")

2025-07-30 15:38:27,903| ERROR   | Password is required for key C:\Users\m.rybakov/.ssh\id_ed25519


DataFrame для записи:
enabled                            Int64
gtin                               Int64
brand                     string[python]
name                      string[python]
category                  string[python]
custom_name                      float64
custom_id                        float64
organization_name         string[python]
date_added           datetime64[ns, UTC]
date_disabled             datetime64[ns]
dtype: object


SSH-туннель запущен. Локальный порт: 8123
Таблица 'gtin_monitoring' проверена/создана в ClickHouse.
Данные из DataFrame успешно записаны в таблицу 'gtin_monitoring'.

Проверка записанных данных (первые 5 строк):
   enabled           gtin           brand  \
0        1  4670010850023  BAIKAL RESERVE   
1        1  4670010850023  BAIKAL RESERVE   
2        1  4670010850023  BAIKAL RESERVE   
3        1  4670010850023  BAIKAL RESERVE   
4        1  4670010850023  BAIKAL RESERVE   

                                                name  \
0  Вода минер

In [None]:
table_name = 'sales'

df_sales['tg'] = df_sales['tg'].astype('string')
df_sales['producer_inn'] = df_sales['producer_inn'].astype('Int64')
df_sales['producer_name'] = df_sales['producer_name'].astype('string')
df_sales['inn'] = df_sales['inn'].astype('string')
df_sales['name'] = df_sales['name'].astype('string')
df_sales['federal_district'] = df_sales['federal_district'].astype('string')
df_sales['federal_subject_code'] = df_sales['federal_subject_code'].astype('string')
df_sales['federal_subject'] = df_sales['federal_subject'].astype('string')
df_sales['address'] = df_sales['address'].astype('string')
df_sales['gtin'] = df_sales['gtin'].astype('Int64')
df_sales['product_name'] = df_sales['product_name'].astype('string')
df_sales['cnt'] = df_sales['cnt'].astype('Int64')
df_sales['year'] = df_sales['year'].astype('Int64')
df_sales['quarter'] = df_sales['quarter'].astype('Int64')
df_sales['month'] = df_sales['month'].astype('Int64')
df_sales['week'] = df_sales['week'].astype('Int64')
df_sales['week_year'] = df_sales['week_year'].astype('string')
df_sales['product_brand'] = df_sales['product_brand'].astype('string')
df_sales['gtin_group'] = df_sales['gtin_group'].astype('string')
df_sales['branch_id'] = df_sales['branch_id'].astype('string')
df_sales['branch_federal_district'] = df_sales['branch_federal_district'].astype('string')
df_sales['branch_region'] = df_sales['branch_region'].astype('string')
df_sales['branch_area'] = df_sales['branch_area'].astype('string')
df_sales['branch_city'] = df_sales['branch_city'].astype('string')
df_sales['branch_city_area'] = df_sales['branch_city_area'].astype('string')
df_sales['branch_city_district'] = df_sales['branch_city_district'].astype('string')
df_sales['branch_settlement'] = df_sales['branch_settlement'].astype('string')
df_sales['branch_street'] = df_sales['branch_street'].astype('string')
df_sales['branch_house'] = df_sales['branch_house'].astype('string')
df_sales['branch_block'] = df_sales['branch_block'].astype('string')
df_sales['branch_flat'] = df_sales['branch_flat'].astype('string')
df_sales['branch_fias_id'] = df_sales['branch_fias_id'].astype('string')
df_sales['retail_network'] = df_sales['retail_network'].astype('string')
df_sales['managment_company'] = df_sales['managment_company'].astype('string')

database_name = 'data_rida'

print("DataFrame для записи:")
print(df_sales.dtypes)
print("\n" + "="*50 + "\n")

with sshtunnel.SSHTunnelForwarder(
    (SSH_HOST, SSH_PORT),
    ssh_username=SSH_USER,
    ssh_pkey=SSH_PRIVATE_KEY_PATH,
    ssh_private_key_password=SSH_PRIVATE_KEY_PASSWORD,
    remote_bind_address=(CLICKHOUSE_HOST, CLICKHOUSE_PORT),
    local_bind_address=('127.0.0.1', LOCAL_BIND_PORT)
) as tunnel:
    print(f"SSH-туннель запущен. Локальный порт: {LOCAL_BIND_PORT}")

    try:
        client = get_client(
            host='127.0.0.1',
            port=LOCAL_BIND_PORT,
            user=CLICKHOUSE_USER,
            password=CLICKHOUSE_PASSWORD,
            database=database_name
        )

        create_table_sql = f'''
            CREATE TABLE IF NOT EXISTS {table_name} (
                tg                              Nullable(String),
                producer_inn                    Int64,
                producer_name                   Nullable(String),
                inn                             Nullable(String),
                name                            Nullable(String),
                federal_district                Nullable(String),
                federal_subject_code            Nullable(String),
                federal_subject                 Nullable(String),
                address                         Nullable(String),
                gtin                            Int64,
                product_name                    Nullable(String),
                cnt                             Int64,
                year                            Int64,
                quarter                         Int64,
                month                           Int64,
                week                            Int64,
                week_year                       Int64,
                product_brand                   Nullable(String),
                gtin_group                      Nullable(String),
                branch_id                       Nullable(String),
                branch_postal_code              Nullable(Float64),
                branch_federal_district         Nullable(String),
                branch_region                   Nullable(String),
                branch_area                     Nullable(String),
                branch_city                     Nullable(String),
                branch_city_area                Nullable(String),
                branch_city_district            Nullable(String),
                branch_settlement               Nullable(String),
                branch_street                   Nullable(String),
                branch_house                    Nullable(String),
                branch_block                    Nullable(String),
                branch_flat                     Nullable(String),
                branch_room                     Nullable(Float64),
                branch_fias_id                  Nullable(String),
                retail_network                  Nullable(String),
                managment_company               Nullable(String)
            ) ENGINE = MergeTree()
            ORDER BY gtin -- Важно выбрать подходящий ключ сортировки
        '''
        client.command(create_table_sql)
        print(f"Таблица '{table_name}' проверена/создана в ClickHouse.")

        # Выполните запрос
        client.insert_df(f'{database_name}.{table_name}', df_sales)
        print(f"Данные из DataFrame успешно записаны в таблицу '{table_name}'.")

        # Проверка данных
        result = client.query(f"SELECT * FROM {database_name}.{table_name} LIMIT 5")
        print("\nПроверка записанных данных (первые 5 строк):")
        print(pd.DataFrame(result.result_rows, columns=result.column_names))

        client.close()
        print("Соединение с ClickHouse закрыто.")

    except Exception as e:
        print(f"Ошибка при подключении к ClickHouse: {e}")

print("SSH-туннель закрыт.")

2025-07-30 15:39:12,672| ERROR   | Password is required for key C:\Users\m.rybakov/.ssh\id_ed25519


DataFrame для записи:
tg                         string[python]
producer_inn                        Int64
producer_name              string[python]
inn                        string[python]
name                       string[python]
federal_district           string[python]
federal_subject_code       string[python]
federal_subject            string[python]
address                    string[python]
gtin                                Int64
product_name               string[python]
cnt                                 Int64
year                                Int64
quarter                             Int64
month                               Int64
week                                Int64
week_year                  string[python]
product_brand              string[python]
gtin_group                 string[python]
branch_id                  string[python]
branch_postal_code                float64
branch_federal_district    string[python]
branch_region              string[python]
branch_area 

2025-07-30 15:48:46,941| ERROR   | Socket exception: Удаленный хост принудительно разорвал существующее подключение (10054)
2025-07-30 15:48:47,003| ERROR   | Could not establish connection from local ('127.0.0.1', 8123) to remote ('127.0.0.1', 8123) side of the tunnel: 
Unexpected Http Driver Exception


Ошибка при подключении к ClickHouse: Error ('Connection aborted.', ConnectionAbortedError(10053, 'Программа на вашем хост-компьютере разорвала установленное подключение', None, 10053, None)) executing HTTP request attempt 1 (http://127.0.0.1:8123)
SSH-туннель закрыт.


In [None]:
table_name = 'averange_prices'

df_averange_prices['tg'] = df_averange_prices['tg'].astype('string')
df_averange_prices['producer_inn'] = df_averange_prices['producer_inn'].astype('Int64')
df_averange_prices['producer_name'] = df_averange_prices['producer_name'].astype('string')
df_averange_prices['federal_district'] = df_averange_prices['federal_district'].astype('string')
df_averange_prices['federal_subject_code'] = df_averange_prices['federal_subject_code'].astype('string')
df_averange_prices['federal_subject'] = df_averange_prices['federal_subject'].astype('string')
df_averange_prices['inn'] = df_averange_prices['inn'].astype('string')
df_averange_prices['name'] = df_averange_prices['name'].astype('string')
df_averange_prices['gtin'] = df_averange_prices['gtin'].astype('Int64')
df_averange_prices['product_name'] = df_averange_prices['product_name'].astype('string')
df_averange_prices['branch_cnt'] = df_averange_prices['branch_cnt'].astype('Int64')
df_averange_prices['volume_sales'] = df_averange_prices['volume_sales'].astype('Int64')
df_averange_prices['average_price'] = df_averange_prices['average_price'].astype('string')
df_averange_prices['average_price_unit'] = df_averange_prices['average_price_unit'].astype('string')
df_averange_prices['year'] = df_averange_prices['year'].astype('Int64')
df_averange_prices['quarter'] = df_averange_prices['quarter'].astype('Int64')
df_averange_prices['month'] = df_averange_prices['month'].astype('Int64')
df_averange_prices['week'] = df_averange_prices['week'].astype('Int64')
df_averange_prices['week_year'] = df_averange_prices['week_year'].astype('string')
df_averange_prices['gtin_group'] = df_averange_prices['gtin_group'].astype('string')
df_averange_prices['product_brand'] = df_averange_prices['product_brand'].astype('string')
df_averange_prices['retail_network'] = df_averange_prices['retail_network'].astype('string')
df_averange_prices['managment_company'] = df_averange_prices['managment_company'].astype('string')

database_name = 'data_rida'

print("DataFrame для записи:")
print(df_sales.dtypes)
print("\n" + "="*50 + "\n")

with sshtunnel.SSHTunnelForwarder(
    (SSH_HOST, SSH_PORT),
    ssh_username=SSH_USER,
    ssh_pkey=SSH_PRIVATE_KEY_PATH,
    ssh_private_key_password=SSH_PRIVATE_KEY_PASSWORD,
    remote_bind_address=(CLICKHOUSE_HOST, CLICKHOUSE_PORT),
    local_bind_address=('127.0.0.1', LOCAL_BIND_PORT)
) as tunnel:
    print(f"SSH-туннель запущен. Локальный порт: {LOCAL_BIND_PORT}")

    try:
        client = get_client(
            host='127.0.0.1',
            port=LOCAL_BIND_PORT,
            user=CLICKHOUSE_USER,
            password=CLICKHOUSE_PASSWORD,
            database=database_name
        )

        create_table_sql = f'''
            CREATE TABLE IF NOT EXISTS {table_name} (
                tg                              Nullable(String),
                producer_inn                    Int64,
                producer_name                   Nullable(String),
                federal_district                Nullable(String),
                federal_subject_code            Nullable(String),
                federal_subject                 Nullable(String),
                inn                             Int64,
                name                            Nullable(String),
                gtin                            Int64,
                product_name                    Nullable(String),
                branch_cnt                      Int64,
                volume_sales                    Int64,
                average_price                   Nullable(String),
                average_price_unit              Nullable(String),
                year                            Int64,
                quarter                         Int64,
                month                           Int64,
                week                            Int64,
                week_year                       Int64,
                gtin_group                      Nullable(String),
                product_brand                   Nullable(String),
                retail_network                  Nullable(String),
                managment_company               Nullable(String)
            ) ENGINE = MergeTree()
            ORDER BY gtin -- Важно выбрать подходящий ключ сортировки
        '''
        client.command(create_table_sql)
        print(f"Таблица '{table_name}' проверена/создана в ClickHouse.")

        # Выполните запрос
        client.insert_df(f'{database_name}.{table_name}', df_averange_prices)
        print(f"Данные из DataFrame успешно записаны в таблицу '{table_name}'.")

        # Проверка данных
        result = client.query(f"SELECT * FROM {database_name}.{table_name} LIMIT 5")
        print("\nПроверка записанных данных (первые 5 строк):")
        print(pd.DataFrame(result.result_rows, columns=result.column_names))

        client.close()
        print("Соединение с ClickHouse закрыто.")

    except Exception as e:
        print(f"Ошибка при подключении к ClickHouse: {e}")

print("SSH-туннель закрыт.")

2025-07-30 15:48:48,009| ERROR   | Password is required for key C:\Users\m.rybakov/.ssh\id_ed25519


DataFrame для записи:
tg                         string[python]
producer_inn                        Int64
producer_name              string[python]
inn                        string[python]
name                       string[python]
federal_district           string[python]
federal_subject_code       string[python]
federal_subject            string[python]
address                    string[python]
gtin                                Int64
product_name               string[python]
cnt                                 Int64
year                                Int64
quarter                             Int64
month                               Int64
week                                Int64
week_year                  string[python]
product_brand              string[python]
gtin_group                 string[python]
branch_id                  string[python]
branch_postal_code                float64
branch_federal_district    string[python]
branch_region              string[python]
branch_area 

2025-07-30 15:49:09,799| ERROR   | Could not connect to gateway 172.17.5.30:22 : Unable to connect to 172.17.5.30: [WinError 10060] Попытка установить соединение была безуспешной, т.к. от другого компьютера за требуемое время не получен нужный отклик, или было разорвано уже установленное соединение из-за неверного отклика уже подключенного компьютера


BaseSSHTunnelForwarderError: Could not establish session to SSH gateway