In [1]:
!pip install -q streamlit catboost

In [2]:
%%writefile app.py
import streamlit as st
import pandas as pd
import os
import re
import numpy as np
from catboost import CatBoostRegressor
from datetime import datetime

# Код из исходного скрипта
horizon = 10
month_length = 30
inf = 1000000000
test_size = 28

def load_merge_data(dir):
    # Исходная функция загрузки данных
    df_train = pd.read_excel(
        dir+"/train.xlsx").rename(columns={"dt": "timestamp", "Цена на арматуру": "target"})
    df_test = pd.read_excel(
        dir+"/test.xlsx").rename(columns={"dt": "timestamp", "Цена на арматуру": "target"})
    df_full = pd.concat([df_train, df_test], ignore_index=True)
    df_chmf = pd.read_csv(
        dir+"/CHMF Акции.csv").rename(columns={"Date": "timestamp"})
    df_magn = pd.read_csv(
        dir+"/MAGN Акции.csv").rename(columns={"Дата": "timestamp"})
    df_nlmk = pd.read_csv(
        dir+"/NLMK Акции.csv").rename(columns={"Date": "timestamp"})
    df_transfer = pd.read_excel(dir+"/Грузоперевозки.xlsx").rename(
        columns={"dt": "timestamp", "Индекс стоимости грузоперевозок": "transfer_cost"})
    df_market = pd.read_excel(
        dir+"/Данные рынка стройматериалов.xlsx").rename(columns={"dt": "timestamp"})
    df_lme = pd.read_excel(
        dir+"/Индекс LME.xlsx").rename(columns={"дата": "timestamp"})
    df_macro = pd.read_excel(
        dir+"/Макропоказатели.xlsx").rename(columns={"dt": "timestamp"})
    df_fuel = pd.read_excel(
        dir+"/Топливо.xlsx").rename(columns={"dt": "timestamp"})
    df_raw_prices = pd.read_excel(
        dir+"/Цены на сырье.xlsx").rename(columns={"dt": "timestamp"})

    df_chmf["timestamp"] = pd.to_datetime(df_chmf["timestamp"])
    df_magn["timestamp"] = pd.to_datetime(df_magn["timestamp"])
    df_nlmk["timestamp"] = pd.to_datetime(df_nlmk["timestamp"])

    merged_df = pd.merge(df_full, df_chmf, on='timestamp', how='outer')
    merged_df = pd.merge(merged_df, df_magn, on='timestamp', how='outer')
    merged_df = pd.merge(merged_df, df_nlmk, on='timestamp', how='outer')
    merged_df = pd.merge(merged_df, df_transfer, on='timestamp', how='outer')
    merged_df = pd.merge(merged_df, df_market, on='timestamp', how='outer')
    merged_df = pd.merge(merged_df, df_lme, on='timestamp', how='outer')
    merged_df = pd.merge(merged_df, df_macro, on='timestamp', how='outer')
    merged_df = pd.merge(merged_df, df_fuel, on='timestamp', how='outer')
    merged_df = pd.merge(merged_df, df_raw_prices, on='timestamp', how='outer')

    return merged_df.sort_values("timestamp")

def prepare_data(merged_df):
    # Исходная функция подготовки данных
    for window in range(3, 2*month_length):
        merged_df['EMA'+str(window)] = merged_df['target'].ewm(alpha=2 / (window + 1), adjust=False).mean()

    merged_df = merged_df.dropna(subset=["target"])

    for column in merged_df.columns:
        if column != "timestamp":
            if merged_df[column].dtype != float:
                merged_df[column] = merged_df[column].apply(lambda x: re.findall(
                    r"[-+]?\d*\.\d+|\d+", str(x))[0] if re.findall(r"[-+]?\d*\.\d+|\d+", str(x)) else None)
                merged_df[column] = merged_df[column].astype(float)

    merged_df["timestamp"] = pd.to_datetime(merged_df["timestamp"])

    with open("models/columns_with_high_nan.txt", "r", encoding='utf8') as file:
        columns_to_drop = file.read().splitlines()

    return merged_df.drop(columns=columns_to_drop)

def make_data_blocks(df, shift, window):
    # Исходная функция создания блоков данных
    upgrade_df = df[["timestamp"]].copy()
    rga, rgb = shift, shift+window

    x = np.array(df["target"]-df["target"].shift(rga)) / 100
    x = x / (np.abs(x)+1)
    upgrade_df["target"] = x

    for col in df.columns:
        if col !="timestamp":
            for i in range(rga, rgb):
                col1 = col+str(i)
                if i!=rga:
                    upgrade_df[col1] = df[col].shift(i)-df[col].shift(rga)
                else:
                    upgrade_df[col1] = df[col].shift(i)

    return upgrade_df[horizon:]

def train_test_split(df):
    return (df[:-test_size].copy().reset_index(drop=True), df[-test_size:].copy().reset_index(drop=True))

class model:
    # Исходный класс модели
    def __init__(self, horizon, window, name, feat_dir, is_full_data_train=False):
        df = prepare_data(load_merge_data(feat_dir))
        upgrade_df = make_data_blocks(df, horizon, window)
        if is_full_data_train:
            upgrade_train = upgrade_df
            upgrade_test = upgrade_df
        else:
            upgrade_train, upgrade_test = train_test_split(upgrade_df)
        self.test_y = upgrade_test['target']
        self.test_x = upgrade_test.drop(columns=['target', "timestamp"])
        self.train_y = upgrade_train['target']
        self.train_x = upgrade_train.drop(columns=['target', "timestamp"])
        self.name = name
        self.model = CatBoostRegressor(verbose=0)

    def load(self):
        self.model.load_model(self.name)

    def predict(self):
        return self.model.predict(self.test_x)

class MetaModel:
    # Исходный класс метамодели
    def __init__(self, window, model_path, feat_dir, name):
        self.cb_models = []
        self.risk_value = 0
        self.name = name
        self.cnt_models = horizon-1
        for i in range(1, horizon):
            m = model(i, window, model_path+"/models/"+ name + "/cb_model_" + str(i)+".cbm",feat_dir)
            m.load()
            self.cb_models.append(m)

    def __find_suitable_index(self, lst):
        for i in range(1, len(lst)):
            lst[i]= (lst[0]* self.risk_value+lst[i]*(1- self.risk_value))
        for i, num in enumerate(lst):
            if num < 0:
                return i
        return len(lst)

    def predict(self):
        result = []
        step = 0
        models_predictions = [_model.predict() for _model in self.cb_models]
        test_size = len(models_predictions[0])
        for day_ind in range(test_size):
            if step == 0:
                segment = [models_predictions[i][day_ind] for i in range(self.cnt_models)]
                step = self.__find_suitable_index(segment) + 1
                result.append(min(step, test_size - day_ind))
            else:
                result.append(0)
            step-=1
        return result

def make_prediction(model_path, features_dir, result_file, purchase_date):
    window = 10
    final_model = MetaModel(window, model_path, features_dir, "metamodel")
    purchase_date = pd.to_datetime(purchase_date)
    res = pd.read_excel(result_file)
    res['Объем'] = final_model.predict()
    return (res[res['dt']==purchase_date]['Объем']).values[0]

st.title('Прогнозирование сроков закупки арматуры')
st.write("Загрузите необходимые файлы данных:")

os.makedirs("data", exist_ok=True)
os.makedirs("models", exist_ok=True)

required_files = {
    'train.xlsx': 'xlsx',
    'test.xlsx': 'xlsx',
    'CHMF Акции.csv': 'csv',
    'MAGN Акции.csv': 'csv',
    'NLMK Акции.csv': 'csv',
    'Грузоперевозки.xlsx': 'xlsx',
    'Данные рынка стройматериалов.xlsx': 'xlsx',
    'Индекс LME.xlsx': 'xlsx',
    'Макропоказатели.xlsx': 'xlsx',
    'Топливо.xlsx': 'xlsx',
    'Цены на сырье.xlsx': 'xlsx'
}

uploaded_files = st.file_uploader("Выберите файлы", type=["xlsx", "csv"], accept_multiple_files=True)

if uploaded_files:
    for file in uploaded_files:
        with open(os.path.join("data", file.name), "wb") as f:
            f.write(file.getbuffer())
    st.success("Файлы успешно загружены!")

all_files_uploaded = all(os.path.exists(os.path.join("data", f)) for f in required_files.keys())

if all_files_uploaded:
    try:
        test_df = pd.read_excel("data/test.xlsx")
        dates = pd.to_datetime(test_df['dt']).dt.date
        min_date = min(dates)
        max_date = max(dates)

        selected_date = st.date_input(
            "Выберите дату закупки",
            min_value=min_date,
            max_value=max_date,
            value=min_date
        )

        if st.button("Рассчитать срок закупки"):
            prediction = make_prediction(
                model_path=".",
                features_dir="data",
                result_file="data/test.xlsx",
                purchase_date=selected_date.strftime("%Y-%m-%d")
            )
            st.success(f"Рекомендуемый срок закупки: {prediction} недель")
    except Exception as e:
        st.error(f"Ошибка: {str(e)}")
else:
    st.warning("Пожалуйста, загрузите все необходимые файлы:")
    for fname in required_files:
        st.write(f"- {fname}")

Overwriting app.py


In [3]:
!npm install localtunnel

[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K
up to date, audited 23 packages in 1s
[1G[0K⠧[1G[0K
[1G[0K⠧[1G[0K3 packages are looking for funding
[1G[0K⠧[1G[0K  run `npm fund` for details
[1G[0K⠧[1G[0K
2 [31m[1mhigh[22m[39m severity vulnerabilities

To address all issues (including breaking changes), run:
  npm audit fix --force

Run `npm audit` for details.
[1G[0K⠧[1G[0K

In [4]:
!streamlit run /content/app.py &>/content/logs.txt &

In [None]:
!npx localtunnel --port 8501 & curl ipv4.icanhazip.com

34.121.116.255
[1G[0K⠙[1G[0Kyour url is: https://olive-mirrors-tickle.loca.lt
