In [1]:
import requests
import time 

import pandas as pd
import numpy as np
from datetime import datetime

import os
import pathlib
import sys
import logging
import io

from portfolio_analytics import ForwardFillImputer, BackwardFillImputer

logging.basicConfig(filename='logs.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)


import boto3

import  matplotlib.pyplot as plt

# Инициализируем подключение к ОХ
s3 = boto3.client('s3')
pd.set_option('display.max_rows', 50)

In [2]:
body = s3.get_object(Bucket='portfoliodata', Key='prepared_data/full_data.csv')['Body'].read()

stocks_df = pd.read_csv(io.BytesIO(body))
stocks_df.head()

Unnamed: 0,begin,SNGS,PIKK,VRSBP,APTK,TGKA,MGTSP,GMKN,RTSBP,TGKBP,...,LSNGP,KMAZ,AFKS,VGSBP,VTBR,MSNG,KROTP,LSNG,KLSB,YRSB
0,2008-01-09,30.501,740.0,4.5,1779.0,0.0319,793.1,64.2001,0.2,0.0157,...,39.0,131.5,41.55,1.14,608.5,5.905,,,,
1,2008-01-10,30.43,744.5,4.4,1795.0,0.0319,793.0,65.14,0.2,0.0154,...,38.5,142.99,42.34,1.14,609.0,5.95,451.01,45.25,,19.2
2,2008-01-11,30.4,743.0,4.45,1770.0,0.0317,770.0,64.602,0.204,0.0159,...,39.0,144.19,46.2,1.12,609.0,5.95,440.2,,13.9,19.2
3,2008-01-14,30.692,775.0,4.1,1755.0,0.0315,773.0,65.85,0.205,0.0155,...,,152.0,44.75,1.19,605.0,5.955,479.99,,,19.05
4,2008-01-15,30.4,780.0,4.2,1755.0,0.0314,780.0,64.66,0.239,0.017,...,39.89,161.99,44.6,1.11,595.5,6.0,440.7,45.36,,20.882


In [3]:
stocks_df.shape

(4258, 81)

# Заполнение пропусков

In [5]:
stocks_df = stocks_df[['SBER', 'LKOH', 'GAZP', 'AFLT', 'ROSN',
       'NVTK', 'MGNT', 'PLZL', 'SNGS', 'CHMF', 'MAGN', 'NLMK', 'SIBN',
       'TATN', 'KMAZ', 'VSMO', 'AKRN', 'MSNG', 'RASP', 'OGKB', 'APTK', 'IRKT',
       'VTBR', 'GMKN', 'TGKB', 'TGKA', 'SVAV', 'AFKS', 'NMTP', 'MSRS', 'begin']]

In [6]:
stocks_df = ForwardFillImputer().fit_transform(stocks_df)
stocks_df = BackwardFillImputer().fit_transform(stocks_df)

stocks_df.isna().sum().sum()

np.int64(0)

# Доходности

In [7]:
stocks_df.index = stocks_df.begin
stocks_df = stocks_df.drop(columns='begin')
stocks_df.head()

Unnamed: 0_level_0,SBER,LKOH,GAZP,AFLT,ROSN,NVTK,MGNT,PLZL,SNGS,CHMF,...,APTK,IRKT,VTBR,GMKN,TGKB,TGKA,SVAV,AFKS,NMTP,MSRS
begin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2008-01-09,101.85,2079.0,352.69,94.12,227.74,196.0,1310.0,1153.0,30.501,565.75,...,1779.0,22.7,608.5,64.2001,0.0239,0.0319,1368.95,41.55,6.45,2.51
2008-01-10,103.53,2062.0,352.4,95.39,230.6,197.0,1308.92,1207.0,30.43,570.2,...,1795.0,23.1,609.0,65.14,0.0241,0.0319,1400.0,42.34,6.397,2.52
2008-01-11,104.7,2036.0,355.91,95.76,223.38,206.0,1368.83,1208.0,30.4,580.0,...,1770.0,23.01,609.0,64.602,0.0238,0.0317,1434.0,46.2,6.348,2.694
2008-01-14,105.0,2004.0,363.7,96.99,224.0,211.98,1379.0,1254.0,30.692,593.5,...,1755.0,22.997,605.0,65.85,0.0242,0.0315,1400.01,44.75,6.25,2.73
2008-01-15,105.71,1990.97,360.24,98.3,221.98,209.99,1360.0,1277.78,30.4,605.0,...,1755.0,22.92,595.5,64.66,0.024,0.0314,1409.0,44.6,6.15,2.788


In [9]:
df_returns = stocks_df.pct_change(fill_method=None).dropna(how="all")

In [10]:
from portfolio_analytics.covariances import rolling_covariance_with_step, expanding_covariance_with_step

In [12]:
rolling_covariance_with_step(
    df_returns,
    step=252,
    window_size=252,
    cov_method='sample_cov'
    )

{'2009-01-15':           SBER      LKOH      GAZP      AFLT      ROSN      NVTK      MGNT  \
 SBER  0.801922  0.576398  0.617992  0.160964  0.687799  0.527116  0.176734   
 LKOH  0.576398  0.666136  0.589521  0.151990  0.652081  0.487425  0.165954   
 GAZP  0.617992  0.589521  0.635571  0.149367  0.666895  0.494747  0.175142   
 AFLT  0.160964  0.151990  0.149367  0.217363  0.183249  0.155527  0.050918   
 ROSN  0.687799  0.652081  0.666895  0.183249  0.872460  0.587096  0.174875   
 NVTK  0.527116  0.487425  0.494747  0.155527  0.587096  0.642855  0.168765   
 MGNT  0.176734  0.165954  0.175142  0.050918  0.174875  0.168765  0.324858   
 PLZL  0.499729  0.417071  0.438926  0.121632  0.565351  0.388570  0.164022   
 SNGS  0.594668  0.576317  0.581168  0.193709  0.708518  0.498762  0.119864   
 CHMF  0.530845  0.554974  0.554170  0.124235  0.597533  0.457793  0.238990   
 MAGN  0.404668  0.442275  0.441694  0.081252  0.433913  0.353217  0.187863   
 NLMK  0.613267  0.581232  0.600073  0

In [None]:
rolling_covariance_with_step(
    df_returns,
    step=252,
    window_size=252,
    cov_method='exp_cov'
    )

{'2009-01-15':           SBER      LKOH      GAZP      AFLT      ROSN      NVTK      MGNT  \
 SBER  1.136767  0.832563  0.905461  0.169167  0.991635  0.736152  0.259461   
 LKOH  0.832563  0.970576  0.868377  0.166084  0.943377  0.699065  0.256731   
 GAZP  0.905461  0.868377  0.946032  0.169971  0.975922  0.704527  0.264649   
 AFLT  0.169167  0.166084  0.169971  0.277084  0.211811  0.178210  0.050102   
 ROSN  0.991635  0.943377  0.975922  0.211811  1.253957  0.820993  0.261546   
 NVTK  0.736152  0.699065  0.704527  0.178210  0.820993  0.956457  0.276229   
 MGNT  0.259461  0.256731  0.264649  0.050102  0.261546  0.276229  0.531823   
 PLZL  0.713507  0.611124  0.648335  0.121239  0.842459  0.553099  0.271159   
 SNGS  0.845341  0.800788  0.842630  0.212900  0.999998  0.676817  0.137670   
 CHMF  0.768210  0.820707  0.829777  0.133505  0.911802  0.655317  0.384918   
 MAGN  0.605781  0.675352  0.686845  0.086146  0.672328  0.531053  0.303110   
 NLMK  0.889314  0.857403  0.891194  0

In [13]:
expanding_covariance_with_step(
    df_returns,
    step=252,
    cov_method='exp_cov'
    )

{'2009-01-14':           SBER      LKOH      GAZP      AFLT      ROSN      NVTK      MGNT  \
 SBER  1.148979  0.842481  0.915306  0.169977  1.003423  0.742638  0.261132   
 LKOH  0.842481  0.980738  0.878385  0.169223  0.953218  0.708788  0.260878   
 GAZP  0.915306  0.878385  0.956465  0.171296  0.987145  0.711539  0.267031   
 AFLT  0.169977  0.169223  0.171296  0.277816  0.215592  0.176567  0.048192   
 ROSN  1.003423  0.953218  0.987145  0.215592  1.267340  0.832115  0.265686   
 NVTK  0.742638  0.708788  0.711539  0.176567  0.832115  0.961773  0.275759   
 MGNT  0.261132  0.260878  0.267031  0.048192  0.265686  0.275759  0.535443   
 PLZL  0.719805  0.619864  0.654732  0.119108  0.853879  0.553936  0.270521   
 SNGS  0.855181  0.809300  0.852241  0.216110  1.010725  0.685606  0.140018   
 CHMF  0.775782  0.831035  0.838530  0.132840  0.923219  0.659376  0.387020   
 MAGN  0.611278  0.684331  0.693877  0.084423  0.681335  0.532958  0.303796   
 NLMK  0.899989  0.866194  0.901509  0

In [14]:
csv_data = df_returns.to_csv().encode('utf-8')
s3.put_object(
    Bucket='portfoliodata',
    Key=f'prepared_data/returns_final.csv',
    Body=csv_data
)

{'ResponseMetadata': {'RequestId': '73f0989de021fe31',
  'HostId': '',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'server': 'nginx',
   'date': 'Sat, 22 Mar 2025 15:37:30 GMT',
   'content-type': 'application/octet-stream',
   'transfer-encoding': 'chunked',
   'connection': 'keep-alive',
   'keep-alive': 'timeout=60',
   'etag': '"b8710ac54905fd65945f2bd3917480bf"',
   'x-amz-request-id': '73f0989de021fe31'},
  'RetryAttempts': 0},
 'ETag': '"b8710ac54905fd65945f2bd3917480bf"'}