# SBER ARIMA Backtest and Forecast (T-Invest)

## 1. Imports and constants

In [None]:
import warnings
import math
from datetime import timedelta
from pathlib import Path

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error
from statsmodels.tsa.arima.model import ARIMA

from tinkoff.invest import Client, CandleInterval
from tinkoff.invest.constants import INVEST_GRPC_API, INVEST_GRPC_API_SANDBOX
from tinkoff.invest.utils import now

from app.config import settings
from app.utils.quotation import quotation_to_float

warnings.filterwarnings('ignore')

FIGI_SBER = 'BBG004730N88'
DAYS_BACK = 1095
ARIMA_ORDER = (1, 1, 2)
FORECAST_HORIZON_DAYS = 5
MIN_HISTORY_DAYS = 252
COMMISSION_RATE = 0.001
INITIAL_CAPITAL = 100000

OUTPUT_DIR = Path('reports/arima_sber')
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

target = INVEST_GRPC_API_SANDBOX if settings.sandbox else INVEST_GRPC_API
print('Sandbox mode:', settings.sandbox)
print('Target:', target)

## 2. Connect to T-Invest and load SBER daily prices

In [None]:
rows = []
with Client(settings.token, target=target, app_name=settings.app_name) as api:
    for candle in api.get_all_candles(
        figi=FIGI_SBER,
        from_=now() - timedelta(days=DAYS_BACK),
        to=now(),
        interval=CandleInterval.CANDLE_INTERVAL_DAY,
    ):
        rows.append({
            'Date': candle.time,
            'Close': quotation_to_float(candle.close),
        })

stock_data = pd.DataFrame(rows)
stock_data['Date'] = pd.to_datetime(stock_data['Date'], utc=True)
stock_data = stock_data.drop_duplicates(subset=['Date']).sort_values('Date').set_index('Date')
stock_data = stock_data[['Close']]

df_close = stock_data['Close']

print('Loaded rows:', len(stock_data))
stock_data.tail()

## 3. Plot close price

In [None]:
plt.figure(figsize=(12, 5))
plt.plot(df_close.index, df_close.values, color='blue')
plt.title('SBER Daily Close Price')
plt.xlabel('Date')
plt.ylabel('Price')
plt.grid(True, alpha=0.3)
plt.show()

## 4. Fit one ARIMA model on full 3-year history

In [None]:
model = ARIMA(df_close, order=ARIMA_ORDER)
fitted = model.fit()

print('ARIMA order:', ARIMA_ORDER)
print('Rows in model:', len(df_close))

## 5. Backtest prediction on full 3-year history (fit + predict)

In [None]:
backtest_result = fitted.get_prediction(start=1, end=len(df_close) - 1)
predicted_price = pd.Series(backtest_result.predicted_mean.values, index=df_close.index[1:])
conf_int = backtest_result.conf_int(alpha=0.05)
lower_price = pd.Series(conf_int.iloc[:, 0].values, index=df_close.index[1:])
upper_price = pd.Series(conf_int.iloc[:, 1].values, index=df_close.index[1:])

forecast_df = pd.DataFrame({
    'actual_price': df_close.iloc[1:].values,
    'predicted_price': predicted_price.values,
    'lower_price': lower_price.values,
    'upper_price': upper_price.values,
}, index=df_close.index[1:])

forecast_df.head()

## 6. Evaluate backtest quality

In [None]:
mse = mean_squared_error(forecast_df['actual_price'], forecast_df['predicted_price'])
mae = mean_absolute_error(forecast_df['actual_price'], forecast_df['predicted_price'])
rmse = math.sqrt(mse)
mape = (forecast_df['actual_price'] - forecast_df['predicted_price']).abs().div(forecast_df['actual_price']).mean()

print('MSE:', mse)
print('MAE:', mae)
print('RMSE:', rmse)
print('MAPE:', mape)

## 7. Plot real price and backtest prediction for last 3 years

In [None]:
plt.figure(figsize=(16, 6))
plt.plot(df_close.index, df_close.values, color='blue', label='Real Price (3Y)')
plt.plot(forecast_df.index, forecast_df['predicted_price'], color='orange', label='Predicted Price (backtest)')
plt.fill_between(
    forecast_df.index,
    forecast_df['lower_price'],
    forecast_df['upper_price'],
    color='orange',
    alpha=0.12,
    label='95% CI'
)
plt.title('SBER Real vs ARIMA Backtest Prediction (3 Years)')
plt.xlabel('Date')
plt.ylabel('Price')
plt.grid(True, alpha=0.3)
plt.legend(loc='upper left')
plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'arima_3y_backtest_overlay.png', dpi=170)
plt.show()

## 8. Weekly long/short strategy from 5-day ARIMA forecast

In [None]:
weekly_close = df_close.resample('W-FRI').last().dropna()
rebalance_dates = weekly_close.index[weekly_close.index >= df_close.index[MIN_HISTORY_DAYS]]

records = []
prev_position = 0

for i in range(len(rebalance_dates) - 1):
    t = rebalance_dates[i]
    t_next = rebalance_dates[i + 1]

    history = df_close.loc[:t]
    fit = ARIMA(history, order=ARIMA_ORDER).fit()
    forecast_5d = float(fit.forecast(steps=FORECAST_HORIZON_DAYS).iloc[-1])

    close_t = float(weekly_close.loc[t])
    close_next = float(weekly_close.loc[t_next])

    position = 1 if forecast_5d > close_t else -1

    asset_return = close_next / close_t - 1.0
    gross_return = position * asset_return

    turnover = abs(position - prev_position)
    fee = COMMISSION_RATE * turnover
    net_return = gross_return - fee

    records.append(
        {
            'date': t,
            'next_date': t_next,
            'close_t': close_t,
            'forecast_5d': forecast_5d,
            'position': position,
            'asset_return': asset_return,
            'gross_return': gross_return,
            'fee': fee,
            'net_return': net_return,
        }
    )

    prev_position = position

trades_df = pd.DataFrame(records).set_index('date')
equity_no_fee = INITIAL_CAPITAL * (1.0 + trades_df['gross_return']).cumprod()
equity_with_fee = INITIAL_CAPITAL * (1.0 + trades_df['net_return']).cumprod()

trades_df.head()

## 9. Two equity charts: without commission and with commission

In [None]:
plt.figure(figsize=(16, 5))
plt.plot(equity_no_fee.index, equity_no_fee.values, color='blue')
plt.title('Strategy Equity Without Commission')
plt.xlabel('Date')
plt.ylabel('Equity')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'equity_without_commission.png', dpi=170)
plt.show()

plt.figure(figsize=(16, 5))
plt.plot(equity_with_fee.index, equity_with_fee.values, color='orange')
plt.title('Strategy Equity With Commission')
plt.xlabel('Date')
plt.ylabel('Equity')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'equity_with_commission.png', dpi=170)
plt.show()

## 10. Summary and save output files

In [None]:
switches = int((trades_df['position'] != trades_df['position'].shift(1)).sum())
years = (trades_df['next_date'].iloc[-1] - trades_df.index[0]).days / 365.25

cagr_no_fee = (equity_no_fee.iloc[-1] / INITIAL_CAPITAL) ** (1 / years) - 1
cagr_with_fee = (equity_with_fee.iloc[-1] / INITIAL_CAPITAL) ** (1 / years) - 1

print('Position switches:', switches)
print(f'CAGR without commission: {cagr_no_fee * 100:.2f}%')
print(f'CAGR with commission: {cagr_with_fee * 100:.2f}%')

forecast_df.to_csv(OUTPUT_DIR / 'forecast_backtest_3y.csv')
trades_df.to_csv(OUTPUT_DIR / 'weekly_trades_5d_forecast.csv')
equity_no_fee.to_csv(OUTPUT_DIR / 'equity_without_commission.csv')
equity_with_fee.to_csv(OUTPUT_DIR / 'equity_with_commission.csv')

print('Saved files:')
print(OUTPUT_DIR / 'forecast_backtest_3y.csv')
print(OUTPUT_DIR / 'arima_3y_backtest_overlay.png')
print(OUTPUT_DIR / 'weekly_trades_5d_forecast.csv')
print(OUTPUT_DIR / 'equity_without_commission.csv')
print(OUTPUT_DIR / 'equity_with_commission.csv')
print(OUTPUT_DIR / 'equity_without_commission.png')
print(OUTPUT_DIR / 'equity_with_commission.png')