In [28]:
import os
import pandas as pd
from sqlalchemy import create_engine
import json
import time

In [29]:
CONFIG_PATH = os.path.expanduser('~/pet-projects/jupyter-notebooks/config.json')

In [11]:

def load_config():
    """Load configuration from config.json"""
    try:
        with open(CONFIG_PATH, 'r') as config_file:
            return json.load(config_file)
    except FileNotFoundError:
        raise Exception("config.json file not found")
    except json.JSONDecodeError:
        raise Exception("Error parsing config.json")

In [26]:
config = load_config()
# Использую secure native протокол с портом 9440
clickhouse_url = f"clickhouse+native://default:{config['clickhouse_password']}@{config['clickhouse_host']}:9440/default?secure=True"
engine = create_engine(clickhouse_url, connect_args={'connect_timeout': 10, 'send_receive_timeout': 10})

In [None]:
# Параметры кеша
cache_file = "vacancies_cache.csv"
cache_duration = 86400  # 24 часа в секундах

if os.path.exists(cache_file) and (time.time() - os.path.getmtime(cache_file)) < cache_duration:
    # Загрузка из кеша
    df = pd.read_csv(cache_file)
    print("Данные загружены из кеша")
else:
    # Выгрузка данных из ClickHouse и сохранение в кеш
    query = "SELECT * FROM vacancies_hh_ru"
    df = pd.read_sql(query, engine)
    df.to_csv(cache_file, index=False)
    print("Данные выгружены из ClickHouse и сохранены в кеш")

df.info()    

In [None]:
print(df['professional_role_names'].unique())