# Прогнозирование отношения доллара к евро

## Подключение библиотек

In [1]:
import os

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set(
    rc = {
        'figure.figsize': (16, 9),
        'figure.dpi': 80,
        'axes.grid': True,
        'axes.grid.axis': 'x',
        'axes.grid.which': 'both',
        'grid.alpha': .4,
        'xtick.minor.visible': True,
        },
    palette = 'colorblind',
    style = 'ticks'
)
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm

## 1 Датасет

### 1.1 Анализ и преобразование данных

In [2]:
os.listdir('data')

['ECB_FX_USD-quote.csv', 'euro-dollar-exchange-rate-historical-chart.csv']

Датасет взят отсюда: https://data.humdata.org/dataset/ecb-fx-rates?force_layout=desktop

In [3]:
df = pd.read_csv('data/ECB_FX_USD-quote.csv')
df.head()

Unnamed: 0,Date,EUR,JPY,BGN,CZK,DKK,GBP,HUF,PLN,RON,...,ILS,INR,KRW,MXN,MYR,NZD,PHP,SGD,THB,ZAR
0,#date,#value+eur,#value+jpy,#value+bgn,#value+czk,#value+dkk,#value+gbp,#value+huf,#value+pln,#value+ron,...,#value+ils,#value+inr,#value+krw,#value+mxn,#value+myr,#value+nzd,#value+php,#value+sgd,#value+thb,#value+zar
1,2021-10-11,1.1574,0.00885539403213466,0.5917783004397178,0.04556872317807788,0.15553942912433477,1.3636042319564552,0.0032087607429997227,0.25208546599002457,0.23385125169215848,...,0.3098131591626961,0.013252570591064189,0.0008366162364557657,0.04821475436469751,0.23980606663351567,0.693884892086331,0.019642251035231824,0.7388445579316949,0.029529276693455796,0.06685999491646832
2,2021-10-08,1.1569,0.00894602536343953,0.5915226505777688,0.045486356845167886,0.15547014634539663,1.3628224761456003,0.0032119159332574475,0.25050885626434544,0.23379746579633412,...,0.30954674372558466,0.013336099919884265,0.0008372412794905197,0.04854906733250803,0.23935036722871628,0.693335730552559,0.019799760397056308,0.7377718257764173,0.02954214652332678,0.06716049669393181
3,2021-10-07,1.1562,0.00897810218978102,0.59116474077104,0.045492819201259097,0.15540531458756165,1.3598673300165836,0.003228166182711637,0.2543446698050948,0.23369378473976754,...,0.3095333708135892,0.013380705490232384,0.0008403410205906081,0.04877822403726079,0.23903245813520776,0.6933317342288318,0.019829182959456678,0.736573867618016,0.02960743642928478,0.06719202203703058
4,2021-10-06,1.1542,0.008980703392468098,0.5901421413232436,0.0454141255164273,0.15513857899405897,1.358361774744027,0.003211195503992432,0.2513228089275993,0.2333697278498928,...,0.3081811385239773,0.013352923482727503,0.0008364313614656027,0.04816371155186299,0.23897964676895045,0.6899808703969392,0.01962791646826746,0.7345977596741344,0.02950710706616218,0.06612015284055431


Избавимся от первой строки с технической информацией, а также и от лишних столбцов.

In [4]:
df = df.drop(labels=0)
df.reset_index(inplace=True)
df = df.loc[:,["Date","EUR"]]
df.head()

Unnamed: 0,Date,EUR
0,2021-10-11,1.1574
1,2021-10-08,1.1569
2,2021-10-07,1.1562
3,2021-10-06,1.1542
4,2021-10-05,1.1602


Выведем информацию о данных.

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5831 entries, 0 to 5830
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Date    5831 non-null   object
 1   EUR     5831 non-null   object
dtypes: object(2)
memory usage: 91.2+ KB


Преобразуем значения столбца Date в тип datetime, а столбец EUR во float.

In [None]:
df['Date'] = pd.to_datetime(df['Date'])
df['EUR'] = df['EUR'].astype(float)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5831 entries, 0 to 5830
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    5831 non-null   datetime64[ns]
 1   EUR     5831 non-null   float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 91.2 KB


In [None]:
df.head()

In [None]:
sns.lineplot(x=df['Date'],y=df['EUR'])

### 1.2 Разбивка датасета на обучающую и тренировочную выборки

Перед разбивкой датасета на основании столбца получим более удобную величину чем даты - столбец с номером даты. Так как датасет ежедневные наблюдения, то проблем при обучении возникнуть не должно.

In [None]:
df['time'] = np.arange(len(df['Date']))
df.head()

In [None]:
# df['lag_1'] = df['EUR'].shift(1)
# df.head()

Разбивка датасета на тренировочную и тестовую выборки

In [None]:
border_line = int(len(df)*0.8)

data_train,data_test = df.iloc[:border_line], df.iloc[border_line:]

Произведём стандартизацию данных для обучения

In [None]:
data_train.loc[:,'EUR'] = (data_train['EUR'] - data_train['EUR'].mean()) / data_train['EUR'].std()

## 2 Выбор значения глубины прогноза

In [None]:
tau = [1,7,14,30,60,365]

## 3 Выбор структуры ИНС

In [None]:
class NN:
    def __init__(self,nn_shape):
        self.layers = []
        self.weights = []
        self.bias = []
        self.dw = []
        self.db = []
        
        for layer_n in range(len(nn_shape)):
            self.layers.append(np.zeros(nn_shape[layer_n],dtype=np.float64))
            if layer_n!=0:
                self.weights.append(np.random.normal(size=(nn_shape[layer_n-1],nn_shape[layer_n])))
                self.bias.append(np.random.normal(size=nn_shape[layer_n]))
                
                # массивы для значений градиентов
                self.dw.append(np.zeros((nn_shape[layer_n-1],nn_shape[layer_n]),dtype = np.float64)) 
                self.db.append(np.zeros(nn_shape[layer_n],dtype = np.float64)) 
                    
    def sigmoid(self,x):
        return 1/(1+np.exp(-x))
    
    def forward(self,x):
        for i in range(len(self.layers)):
            if i== 0:
                self.layers[i]=x
            else:
                self.layers[i]=self.sigmoid(x+self.bias[i-1])
            if i!= len(self.layers)-1:
                x = np.dot(self.weights[i].T,self.layers[i])
#                 x = self.weights[i].T@self.layers[i]
                
        return self.sigmoid(x+self.bias[-1])
    
    def back_propagation(self,y,lr=0.001):
        # вычисляем градиенты для weights
        for L in range(1,len(self.layers)):
            for i in range(len(self.layers[-L])):
                for j in range(len(self.layers[-L-1])):
                    if L == 1:
                        # dw
#                         dE_w = self.layers[-L][i]*(self.layers[-L][i]-y[i])
                        dE_w = (self.layers[-L][i]-y[i])
                        da_w = self.layers[-L][i]*(1-self.layers[-L][i])                                              
                        dz_w = self.layers[-L-1][j]
                        

                    else:
                        # dw
                        dE_w = np.dot(self.dw[-L+1][i,:],self.weights[-L+1][i,:])
                        da_w = self.layers[-L][i]*(1-self.layers[-L][i])
                        dz_w = self.layers[-L-1][j]
                        
                    
                    self.dw[-L][j,i] = dE_w*da_w*dz_w        
                    self.weights[-L][j,i]-=lr*self.dw[-L][j,i]
        
         # вычисляем градиенты для bias  
        for L in range(1,len(self.db)+1):
            for i in range(len(self.db[-L])):
                if L!=1:
                    dE_b = np.dot(self.weights[-L+1][i],self.db[-L+1])
                    da_b = self.layers[-L][i]*(1-self.layers[-L][i])
                    dz_b = 1
                        
                else:
#                     dE_b = self.layers[-L][i]*(self.layers[-L+1][i]-y[i])
                    dE_b = (self.layers[-L+1][i]-y[i])
                    da_b = self.layers[-L+1][i]*(1-self.layers[-L+1][i])
                    dz_b = 1
                    
                self.db[-L][i] = dE_b*da_b*dz_b
                
        # применяем градиенты к weights        
        for L in range(len(self.dw)):
            self.weights[L]-=lr*self.dw[L]
        
        # применяем градиенты к bias
        for L in range(len(self.db)):
            self.bias[L]-=lr*self.db[L]


In [None]:
# class NN:
#     def __init__(self,nn_shape):
#         self.layers = []
#         self.weights = []
#         self.bias = []
#         for layer_n in range(len(nn_shape)):
#             self.layers.append(np.zeros(nn_shape[layer_n],dtype=np.float64))
#             if layer_n!=0:
#                 self.weights.append(np.random.normal(size=(nn_shape[layer_n-1],nn_shape[layer_n])))
#                 self.bias.append(np.random.normal(size=nn_shape[layer_n]))
                    
# #     def sigmoid(self,x):
# #         return 1/(1+np.exp(-x))
    
#     def relu(self,x):
#         return x if (x>0) else 0
    
#     def forward(self,x,act_func=):
#         for i in range(len(self.layers)):
#             if i== 0:
#                 self.layers[i]=x
#             else:
#                 self.layers[i]=self.sigmoid(x+self.bias[i-1])
#             if i!= len(self.layers)-1:
#                 x = np.dot(self.weights[i],self.layers[i])
                
#         return x
    
#     # relu
#     def back_popagation(self,y,lr=0.001):
#         for output in y:
#             for i in range(len(self.weights)):
#                 self.weights[i] = 
#         pass
            

In [None]:
net = NN((2,2))
net.weights

## Обучение

In [None]:
def train(model,X_train,epochs=1,tau = 2,X_val=None,y_val=None):
#     train_loss_history = []
#     validation_accuracy_history = []
    for i in tqdm(range(epochs)):
        running_loss = 0
        for i in range(len(X_train)//tau - 1):
            X_pred = net.forward(X_train[i*tau:i*tau+tau])
            running_loss += np.sum(np.square(X_pred-X_train[i*tau+tau]))/2
            net.back_propagation(X_train[i*tau+tau])
        epoch_loss = running_loss/len(X_train)
        train_loss_history.append(epoch_loss)
#         print("train loss: {train_loss:.4f}".format(train_loss = epoch_loss))
        if X_val is not None and y_val is not None:
            y_preds = get_preds(model,X_val)
            current_accuracy = accuracy_score(np.argmax(y_preds,axis=1), np.argmax(y_test,axis=1))
            validation_accuracy_history.append(current_accuracy)
#             print("accuracy: {accuracy:.4f}".format(accuracy = current_accuracy))

#     return train_loss_history, validation_accuracy_history

In [75]:
tau_current_experiment = 2
net = NN((tau_current_experiment,10,10,1))

train(net,data_train['EUR'],epochs=1,X_val=None,y_val=None)

  0%|                                                                                            | 0/1 [00:00<?, ?it/s]


IndexError: invalid index to scalar variable.