In [1]:
# Импортируем необходимые модули и библиотеки
import os
from catboost import CatBoostClassifier, Pool, CatBoost
import pandas as pd
from sqlalchemy import create_engine, Column, Integer, String
from sqlalchemy.orm import sessionmaker, Session
from sqlalchemy.ext.declarative import declarative_base
from typing import List
from fastapi import FastAPI, Depends 
from datetime import datetime
from pydantic import BaseModel

In [2]:
'''
ФУНКЦИИ ПО ЗАГРУЗКЕ МОДЕЛЕЙ
'''
# Проверка если код выполняется в лмс, или локально
def get_model_path(path: str) -> str:
    """Просьба не менять этот код"""
    if os.environ.get("IS_LMS") == "1":  # проверяем где выполняется код в лмс, или локально. Немного магии
        MODEL_PATH = '/workdir/user_input/model'
    else:
        MODEL_PATH = path
    return MODEL_PATH

class CatBoostWrapper(CatBoost):
    def predict_proba(self, X):
        return self.predict(X, prediction_type='Probability')

# Загрузка модели
def load_models():
    model_path = get_model_path("catboost_model.cbm")
    model = CatBoostWrapper()
    model.load_model(model_path)
    return model

In [3]:
'''
Получение данных из базы данных
'''

# Определяем функцию для получения данных из базы данных PostgreSQL
def batch_load_sql(query: str) -> pd.DataFrame:
    CHUNKSIZE = 200000
    engine = create_engine(
        "postgresql://robot-startml-ro:pheiph0hahj1Vaif@"
        "postgres.lab.karpov.courses:6432/startml"
    )
    conn = engine.connect().execution_options(stream_results=True)
    chunks = []
    for chunk_dataframe in pd.read_sql(query, conn, chunksize=CHUNKSIZE):
        chunks.append(chunk_dataframe)
    conn.close()
    return pd.concat(chunks, ignore_index=True)

def load_features() -> pd.DataFrame:
    query = "a-efimik_features_lesson_22_500MB"
    return batch_load_sql(query)

In [4]:
model = load_models()
features = load_features()

In [5]:
features

Unnamed: 0,user_id,post_id_x,gender,age,country,city,exp_group,os,source,topic,...,component_2,component_3,component_4,component_5,component_6,component_7,component_8,component_9,component_10,post_id_y
0,19910,2293,0,49,7,1953,0,1,0,6,...,-0.068447,0.095292,-0.013524,-0.107898,0.054433,0.057245,-0.010784,-0.040313,-0.034863,2293
1,19910,5858,0,49,7,1953,0,1,0,3,...,0.069727,-0.040276,-0.003474,-0.060572,0.040163,0.147658,0.003620,-0.003383,0.038553,5858
2,19910,4330,0,49,7,1953,0,1,0,3,...,0.088469,-0.003660,0.045802,0.016443,-0.042564,-0.013959,-0.035642,-0.007178,0.044798,4330
3,19910,1846,0,49,7,1953,0,1,0,5,...,-0.011699,0.055881,0.028060,-0.059715,-0.064100,-0.080073,-0.110536,0.054250,0.026702,1846
4,19910,5716,0,49,7,1953,0,1,0,3,...,0.167099,-0.070968,0.049694,0.013623,-0.021593,-0.058573,0.030697,0.036786,-0.027123,5716
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1537873,19909,4777,1,25,7,1953,0,0,0,3,...,0.018281,0.024119,0.008874,-0.064000,-0.047666,0.066901,-0.054509,0.010801,0.044446,4777
1537874,19909,2688,1,25,7,1953,0,0,0,1,...,-0.086500,-0.099185,0.228000,-0.055280,0.080617,0.028880,-0.070164,-0.013592,0.018305,2688
1537875,19909,1079,1,25,7,1953,0,0,0,4,...,-0.255849,-0.092953,-0.008727,0.107325,-0.103880,0.092647,0.082291,0.048761,-0.049324,1079
1537876,19909,3638,1,25,7,1953,0,0,0,1,...,-0.034514,-0.076291,0.197621,-0.070486,-0.011965,0.013616,-0.040987,-0.037456,-0.008935,3638
