<a href="https://colab.research.google.com/github/TStartio/RD_test/blob/main/%D0%97%D0%B0%D0%B4%D0%B0%D0%BD%D0%B8%D0%B5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## ADAM vs SGD

In [None]:
import linearmodels
# Imports
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
from scipy.io import arff
from sklearn.preprocessing import StandardScaler

## Data Preparation

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#Загрузка данных для моделирования температуры!
#x от -3 до 3
#y от 0 до 1
from datetime import datetime as dt
from sklearn.model_selection import train_test_split
# Загрузка данных из .dat файла

from scipy.io import arff
import pandas as pd

# Укажите путь к вашему ARFF файлу
file_path = '/content/drive/My Drive/Machine Learning Datasets/mv.dat'

# Откроем файл и прочитаем его построчно
with open(file_path, 'r') as file:
    lines = file.readlines()

# Удалим строки, которые начинаются с нестандартных меток, таких как @inputs и @outputs
lines = [line for line in lines if not line.startswith('@inputs') and not line.startswith('@outputs')]

# Также можно удалить другие строки с нестандартными метками, если они есть
# Например, можно удалить все строки, начинающиеся с @, кроме @relation, @attribute, и @data:
lines = [line for line in lines if not line.startswith('@') or line.startswith('@relation') or line.startswith('@attribute') or line.startswith('@data')]

# Теперь создадим новый ARFF файл без этих строк
cleaned_file_path = '/content/drive/My Drive/Machine Learning Datasets/mv_cleaned.dat'
with open(cleaned_file_path, 'w') as cleaned_file:
    cleaned_file.writelines(lines)

# Загрузка очищенного файла
data, meta = arff.loadarff(cleaned_file_path)

# Преобразование данных в pandas DataFrame
df = pd.DataFrame(data)

# Преобразуем байтовые строки в обычные строки (если необходимо)
df = df.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)

# Проверка первых строк данных
print(df.head())




# Проверка, какие столбцы есть в DataFrame
print(df.columns)

# Обработка данных после загрузки
dfn = df.dropna()  # Удаление строк с NaN

# Преобразуем столбцы в нужный формат
dfn['X1'] = pd.to_numeric(dfn['X1'], errors='coerce')  # Пример для столбца 'X1'
dfn['X2'] = pd.to_numeric(dfn['X2'], errors='coerce')  # Пример для столбца 'X2'

# Дальнейшая обработка данных
dfn["smooth_X1"] = dfn['X1'].rolling(30, min_periods=1).mean()

# Удаление ненужных столбцов
fields_to_drop = ['ind', 'ind.1', 'ind.2', 'ind.3', 'ind.4', 'ind.5', 'rain', 'ddhm', 'mintp', 'igmin', 'gmin', 'soil', 'pe', 'evap', 'smd_wd', 'smd_md', 'smd_pd', 'glorad', 'smooth_X1']
dfn = dfn.drop(fields_to_drop, axis=1, errors='ignore')

# Удаление строк с NaN
dfn = dfn.dropna()

# Удаление строк с индексами 301 и 305
dfn = dfn.drop([301, 305])

# Создание нового признака: сдвиг столбца X1 на 1
dfn['X1_s'] = dfn['X1'].shift(1)

# Удаление первой строки, так как она имеет NaN значение после сдвига
dfn.drop(index=dfn.index[0], axis=0, inplace=True)

# Преобразование другого столбца в числовой формат
# Например, если вам нужно преобразовать X1 в числовой формат
dfn['X1'] = pd.to_numeric(dfn['X1'], errors='coerce')

# Сброс индекса
dfn.reset_index(drop=True, inplace=True)

# Проверка данных
print(dfn.head())



# Исправление на актуальные имена столбцов
# Пример: заменяем 'maxtp', 'date', 'hg' на существующие столбцы, например, 'X1', 'X2', 'X3', и так далее
X_branch1 = dfn.drop(['X1', 'X2', 'X3'], axis=1)  # Удаляем столбцы, которые вам не нужны (например, X1, X2, X3)
y_branch1 = dfn.drop(['X4', 'X5', 'X6', 'X7'], axis=1)  # Удаляем другие ненужные столбцы

# Нормализация данных
scaler = StandardScaler()
scaled = scaler.fit_transform(X_branch1)
scaled_df = pd.DataFrame(data=scaled, columns=X_branch1.columns)
X_branch1_n = X_branch1.copy()
X_branch1_n[scaled_df.columns] = np.array(scaled_df)

scaler = StandardScaler()
scaled = scaler.fit_transform(y_branch1)
scaled_df = pd.DataFrame(data=scaled, columns=y_branch1.columns)
y_branch1_n = y_branch1.copy()
y_branch1_n[scaled_df.columns] = np.array(scaled_df)

# Перевод y в нужный формат
y_branch1_n = y_branch1_n.values.reshape(-1, 1)

# Создание столбца с единичными значениями для расчета смещения (bias)
ones_column = np.ones((X_branch1_n.shape[0], 1))
X_branch1_n_with_ones = np.hstack((ones_column, X_branch1_n))

# Разделение на тренировочную и тестовую выборки
total_samples = len(X_branch1_n)
split_index = int(total_samples * 0.7)
split_index2 = int(total_samples * 0.3)

# Первая 70% как обучающая выборка
train_X, train_y = X_branch1_n[:-split_index], y_branch1_n[:-split_index]

# Оставшиеся 30% как тестовая выборка
val_X, val_y = X_branch1_n[-split_index2:], y_branch1_n[-split_index2:]


#train_X, val_X, train_y, val_y = train_test_split(
#    X_branch1_n_with_ones,
#    y_branch1_n,
#    test_size=0.3,  # Указываем 30% на тестовую выборку
#    random_state=42  # Чтобы результаты были воспроизводимыми, указываем seed
#)
#test_X, test_y = X_branch1_n[-1500:], y_branch1_n[-1500:]

         X1       X2   X3       X4        X5        X6   X7   X8       X9  \
0 -4.809150 -10.5062  0.0 -5.25312  0.665394 -23.62270  0.0  1.0  445.012   
1 -0.928705 -10.8176  1.0 -5.40878  0.430141 -25.32900  0.0  0.0  250.038   
2  1.304560 -11.9609  0.0 -5.98045  0.815231  -7.46146  0.0  1.0  261.694   
3 -4.068760 -11.7433  0.0 -5.87165  0.285156  -2.89871  1.0  0.0  175.301   
4  4.750250 -12.9444  2.0 -6.47220  0.701619 -12.84180  1.0  1.0  341.723   

      X10         Y  
0  1082.0  -2.40458  
1  1115.0 -26.25770  
2  1163.0   0.65228  
3  1191.0  -6.96747  
4  1010.0   2.37512  
Index(['X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'X9', 'X10', 'Y'], dtype='object')
         X1       X2   X3       X4        X5        X6   X7   X8       X9  \
0 -0.928705 -10.8176  1.0 -5.40878  0.430141 -25.32900  0.0  0.0  250.038   
1  1.304560 -11.9609  0.0 -5.98045  0.815231  -7.46146  0.0  1.0  261.694   
2 -4.068760 -11.7433  0.0 -5.87165  0.285156  -2.89871  1.0  0.0  175.301   
3  4.75

  df = df.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)


In [None]:
import sympy

In [None]:
X_branch1_n

Unnamed: 0,X4,X5,X6,X7,X8,X9,X10,Y,X1_s
0,-0.335011,0.748555,-1.322337,-0.650049,-0.573262,-0.426723,0.253688,-1.670017,-1.661577
1,-0.524088,1.415980,0.331044,-0.650049,1.744403,-0.325605,1.080634,0.912481,-0.319968
2,-0.488103,0.497273,0.753261,1.538345,-0.573262,-1.075080,1.563019,0.181229,0.452152
3,-0.686732,1.219072,-0.166828,1.538345,1.744403,0.368661,-1.555255,1.077819,-1.405598
4,-0.940483,-0.199309,-0.309666,1.538345,-0.573262,-0.090915,-0.642169,-0.523458,1.643452
...,...,...,...,...,...,...,...,...,...
40760,-0.625759,0.424748,-0.097251,1.538345,-0.573262,-0.614922,0.167548,-0.594762,-1.271632
40761,-0.761874,0.977952,-1.106387,1.538345,1.744403,0.236469,0.891125,1.000265,-1.023463
40762,-0.805281,-0.990330,-0.744055,1.538345,-0.573262,0.671434,1.631931,-1.085341,1.084659
40763,-0.348006,-0.926701,-0.695428,-0.650049,-0.573262,-1.713851,-1.210694,-1.058403,-0.374222


In [None]:
train_X

Unnamed: 0,X4,X5,X6,X7,X8,X9,X10,Y,X1_s
0,-0.335011,0.748555,-1.322337,-0.650049,-0.573262,-0.426723,0.253688,-1.670017,-1.661577
1,-0.524088,1.415980,0.331044,-0.650049,1.744403,-0.325605,1.080634,0.912481,-0.319968
2,-0.488103,0.497273,0.753261,1.538345,-0.573262,-1.075080,1.563019,0.181229,0.452152
3,-0.686732,1.219072,-0.166828,1.538345,1.744403,0.368661,-1.555255,1.077819,-1.405598
4,-0.940483,-0.199309,-0.309666,1.538345,-0.573262,-0.090915,-0.642169,-0.523458,1.643452
...,...,...,...,...,...,...,...,...,...
12225,-0.541939,1.032169,-0.121763,-0.650049,1.744403,1.532194,1.373510,0.904701,0.658830
12226,1.104829,-0.586023,0.804292,-0.650049,-0.573262,-1.060011,0.064180,0.422043,0.396095
12227,-0.205147,1.439566,-1.061498,-0.650049,1.744403,0.354000,-0.418205,0.750184,-0.728702
12228,-0.567194,0.083601,-1.582676,-0.650049,-0.573262,0.417589,0.115864,-1.625977,-0.717237


## Neural Network

In [None]:
lossesrc = {'mse':[]} # For Plotting of boxplot
class NeuralNetwork(object):
    def __init__(self, input_nodes, hidden_nodes, output_nodes, learning_rate, seed_n):
        self.input_nodes = input_nodes
        self.hidden_nodes = hidden_nodes
        self.output_nodes = output_nodes
        self.lr = learning_rate
        self.l2_m = 0
        self.l1_m = 0
        self.l2_v = 0
        self.l1_v = 0
        self.t = 0
        self.shag=200000000 #200000000
        self.alfa=1
        num = input_nodes
        np.random.seed(42)
        # Weights Initilization
        self.w0 = np.random.normal(0.0, 1, (self.input_nodes, self.hidden_nodes))
        #self.w1 = np.random.normal(0.0, 1, (self.hidden_nodes, self.output_nodes))
        #print(num)
        self.we=np.ones(num)
        self.wep=np.ones(num)
        self.w0new=self.w0.copy()
        #self.w1new=self.w1.copy()
        #print(self.input_nodes)
        #print(self.hidden_nodes)
        #print(self.w0)
        #print(self.w0[1,1])
        #print('init w1',self.w1)

        def relu(x, deriv=False):

            if deriv:
                #return 1.0*(x>0)
                return np.where(x > 0, 1, 0)
            return np.maximum(0,x)

        def linear(x, deriv=False):

            if deriv:
                return 1
            return x

        def snake(x, deriv=False):

            if deriv:
                return 2*np.cos(0.5*x)*np.sin(0.5*x)+1
            return 1/0.5*np.sin(0.5*x)**2+x

        def reluSnake(x, deriv=False):

            if deriv:
                return np.where(x > 0, 2*np.cos(x)*np.sin(x)+1, 0)
            return np.maximum(0,np.sin(x)**2+x)

        def sin(x, deriv=False):

            if deriv:
                return (np.cos(x)+np.sin(x))
            return (np.sin(x)+np.cos(x))
        #обратный квадратный


        def invers(x, deriv=False):

            if deriv:
                return 1/(np.sqrt(1+0.5*x**2))**3
            return x/(np.sqrt(1+0.5*x**2))

        def arctg(x, deriv=False):

            if deriv:
                return 1/(x**2+1)
            return np.arctan(x)

        def ln(x, deriv=False):

            if deriv:
                return 1/x
            return np.log(x)

        def softs(x, deriv=False):

            if deriv:
                return 1/(1+np.abs(x))**2
            return x/(1+np.abs(x))

        #изменять 5000 на другое число до получения наилучшего значения
        def rasstr(x, deriv=False):

            if deriv:
                return (2*x-20*np.pi*sin(2*np.pi*x))/5000
            return (10+x**2-10*np.cos(2*np.pi*x))/5000

        #изменять 10000 до получения наилучшего значения
        def Stib(x, deriv=False):

            if deriv:
                return (4*x**3-32*x+5)/10000
            return (x**4-16*x**2+5*x)/10000

        #изменять 500 до получения наилучшего значения
        def him(x, deriv=False):

            if deriv:
                return ((x**2+x-11)*2*(2*x+1))/500+((x+x**2-7)*2*(2*x+1))/500
            return ((x**2+x-11)**2+(x+x**2-7)**2)/500

        def tang(x, deriv=False):

            if deriv:
                return 1-x**2
            return (np.exp(x)-np.exp(-x))/(np.exp(x)+np.exp(-x))
        def sigmoid(x, deriv=False):

            if deriv:
                return x*(1-x)
            return 1/(1+np.exp(-x))
        #Гауссова
        def wsigm(x, deriv=False):

            if deriv:
                return -2*x*np.exp(-(x**2))
            return np.exp(-(x**2))
        def softplus(x, deriv=False):

            if deriv:
                return 1/(1+np.exp(-x))
            return (np.log(1+np.exp(x)))


        self.activation_function1 = sin


    def train(self, features, targets, optimizer, seed_n, decay_rate_1 = None,
              decay_rate_2 = None, epsilon = None):
        # Feed Forward
        l0 = features
        import random
        #np.random.seed(seed_n)
        #random.seed(1)
        random.seed(42)
        num = features.shape[1]
        l1 = self.activation_function1(np.dot(l0, self.w0))
        #l2 = self.activation_function2(np.dot(l1, self.w1))
        #print('l1', l1)
        #print('l2', l2)
        #rr=1
        # Backpropagation
        l1_error = l1 - targets
        #l2_delta = l2_error * self.activation_function2(l2, deriv=True)
        #l1_error = l2_delta.dot(self.w1.T)
        l1_delta = l1_error * self.activation_function1(l1, deriv=True)

        if optimizer == 'sgd':
            # Update Weights
            #self.w1-= self.lr * l1.T.dot(l2_delta)
            self.w0 -= self.lr * l0.T.dot(l1_delta)

        #Выбор в соответствии с производной
        if optimizer == 'DC':
            # Update Weights
            flag=True
            #self.w1new=self.w1.copy()
            self.w0new=self.w0.copy()
            #l1 = self.activation_function1(np.dot(l0, self.w0))
            #l2 = self.activation_function2(np.dot(l1, self.w1))
            L=np.sum((l1-targets)**2)
            #print('L', L)
            r=1
            self.shag=L/10
            Lnew=L-self.shag/r
            dfn=L.copy()
            while (flag==True):
              #заполнение вероятности
              dfng=dfn.copy()
              dfn=dfn-self.shag/r;
              sum=np.sum(self.we)
              p=-1
              if sum>0:
                #for i in range(10):
                #  self.wep[i]=self.we[i]/sum
                self.wep=self.we/sum
                #print('после',self.wep[i])
                sw=0
                number = random.random()
                for i in range(num):
                  sw=sw+self.wep[i]
                  if number<sw:
                    p=i
                    break
              if (p==-1):
                #если все исчерпаны
                r=r*2
                dfn=dfn+self.shag/(r/2)-self.shag/r;
                self.we=np.ones(num)
                if r>100000:#100000
                  flag=False
                  break
                sum=np.sum(self.we)
                p=-1
                #for i in range(10):
                #  self.wep[i]=self.we[i]/sum
                self.wep=self.we/sum
                sw=0
                number = random.random()
                for i in range(num):
                  sw=sw+self.wep[i]
                  if number<sw:
                    p=i
                    break
              cn = np.asarray(self.we[:]).reshape(-1, 1, order='F')
              #print(self.we)
              l1 = self.activation_function1(np.dot(l0, self.w0))
              self.w0new=self.w0.copy()
              l1_error = l1 - targets
              l1_delta = l1_error * self.activation_function1(l1, deriv=True)
              w0pr=l0.T.dot(l1_delta)
              #print(w0pr)
              w0pr=w0pr.to_numpy()
              #rint(cn)
              #print(w0pr)
              cw=np.multiply(np.abs(w0pr),cn)

              cw=cw/np.sum(cw)
              sw=0
              number = random.random()
              for i in range(num):
                    sw=sw+cw[i]
                    #print('sw',sw)
                    if number<sw:
                        z=i
                        break

                #print('cn1',cn1)
                #print('w1pr',w1pr)
                #print('cw',cw)
                #self.wep=self.we/sum
              wnew=self.w0[z].copy()
              Ls1=dfng.copy()
              Lnew=dfn.copy()
              for q in range(40):
                  wnew1=wnew-(Ls1-Lnew)/(2*w0pr[z])
                  #print('wnew1',wnew1)
                  #print('wnew0',wnew)
                  #if np.abs(wnew1-wnew)<0.001:
                  #  wnew=wnew1.copy()
                  #  break
                  if np.abs(Ls1-Lnew)<0.001:
                    break
                  else:
                    wnew=wnew1.copy()
                    self.w0new[z]=wnew.copy()
                    l22 = self.activation_function1(np.dot(l0, self.w0new))
                    Ls1=np.sum((l22-targets)**2)
                    #print('Ls1',Ls1)
                    l1_error = l22 - targets
                    l1_delta = l1_error * self.activation_function1(l22, deriv=True)
                    w0pr=l0.T.dot(l1_delta)
                    w0pr=w0pr.to_numpy()
              self.w0new[z]=wnew1.copy()
              l2t = self.activation_function1(np.dot(l0, self.w0new))
              dfn1=np.sum((l2t-targets)**2)
              if dfn1<dfng:
                  self.w0[z]=self.w0new[z].copy()
                  self.we=np.ones(num)
                  dfn=dfn1.copy()
                  #print('dfn', dfn)
              else:
                  self.we[p]=0
                  dfn=dfng.copy()



#Случайный выбор узла
        if optimizer == 'RC':
            # Update Weights
            flag=True
            #self.w1new=self.w1.copy()
            self.w0new=self.w0.copy()
            #l1 = self.activation_function1(np.dot(l0, self.w0))
            #l2 = self.activation_function2(np.dot(l1, self.w1))
            L=np.sum((l1-targets)**2)
            #print('L', L)
            r=1
            self.shag=L/10
            #self.shag=20000
            Lnew=L-self.shag/r
            dfn=L.copy()
            while (flag==True):
              #заполнение вероятности
              dfng=dfn.copy()
              dfn=dfn-self.shag/r;
              sum=np.sum(self.we)
              p=-1
              if sum>0:
                self.wep=self.we/sum
                #print('wepi',self.wep[i])
                sw=0
                number = random.random()
                for i in range(num):
                  sw=sw+self.wep[i]
                  if number<sw:
                    p=i
                    break
              if (p==-1):
                #если все исчерпаны
                r=r*2
                dfn=dfn+self.shag/(r/2)-self.shag/r;
                self.we=np.ones(num)
                if r>10000: #10000
                  flag=False
                  break
                sum=np.sum(self.we)
                p=-1
                self.wep=self.we/sum
                sw=0
                number = random.random()
                for i in range(num):
                  sw=sw+self.wep[i]
                  if number<sw:
                    p=i
                    break
              #cn = np.asarray(self.we[:-2]).reshape(-1, 2, order='F')
              #print('cn',cn)
              #if p==9 or p==8:
              #если второй уровень, то
                #if p==8:
                #  z=0
                #else:
                 # z=1
              z=p
              l1 = self.activation_function1(np.dot(l0, self.w0))
                #l2 = self.activation_function2(np.dot(l1, self.w1))
              self.w0new=self.w0.copy()
                #wnew=self.w1[z].copy()
              l1_error = l1 - targets
              l1_delta = l1_error * self.activation_function1(l1, deriv=True)
              w0pr=l0.T.dot(l1_delta)
                #if np.abs(w1pr[0])>np.abs(w1pr[1]) and self.we[8]==1:
                #  z=0
                #else:
                #  z=1
              #print('h')
              wnew=self.w0[z].copy()
              Ls1=dfng.copy()
              Lnew=dfn.copy()
              #print(w0pr)
              w0pr=w0pr.to_numpy()
              #print('h')
              #print('w0pr',w0pr)
              #print('wnew',wnew)
              #print('w0pr',w0pr[2])
              for q in range(50):
                  wnew1=wnew-(Ls1-Lnew)/(2*w0pr[z])
                  #print('wnew1',wnew1)
                  #print('wnew0',wnew)
                  #попробовать окончание по производной?
                  if np.abs(Ls1-Lnew)<0.001:
                    break
                  #if np.abs(wnew1-wnew)<0.000001:
                  #  break
                  else:
                    wnew=wnew1.copy()
                    self.w0new[z]=wnew.copy()
                    l11 = self.activation_function1(np.dot(l0, self.w0new))
                    Ls1=np.sum((l11-targets)**2)
                    #print('Ls1',Ls1)
                    l1_error = l11 - targets
                    l1_delta = l1_error * self.activation_function1(l11, deriv=True)
                    w0pr=l0.T.dot(l1_delta)
                    w0pr=w0pr.to_numpy()
             # print('h')
              self.w0new[z]=wnew1.copy()
              l2t = self.activation_function1(np.dot(l0, self.w0new))
              dfn1=np.sum((l2t-targets)**2)
              if (dfn1<dfng):
                  self.w0[z]=self.w0new[z].copy()
                  self.we=np.ones(num)
                  dfn=dfn1.copy()
                  #print('dfn',dfn)
              else:
                  self.we[p]=0
                  dfn=dfng.copy()

              lossesrc['mse'].append(dfn/len(targets))
            #print('self.w1',self.w1)
            #print('self.w0',self.w0)

        if optimizer == 'adam':
            # Gradients for each layer
            #g1 = l1.T.dot(l2_delta)
            g0 = l0.T.dot(l1_delta)

            #print('g1',g1)
            #print('g0',g0)

            self.t += 1 # Increment Time Step

            # Computing 1st and 2nd moment for each layer
            #self.l2_m = self.l2_m * decay_rate_1 + (1- decay_rate_1) * g1
            self.l1_m = self.l1_m * decay_rate_1 + (1- decay_rate_1) * g0
            #print('self.l2_m',self.l2_m)
            #print('self.l1_m',self.l1_m)


            #self.l2_v = self.l2_v * decay_rate_2 + (1- decay_rate_2) * (g1 ** 2)
            self.l1_v = self.l1_v * decay_rate_2 + (1- decay_rate_2) * (g0 ** 2)
            #print('self.l2_v',self.l2_v)
            #print('self.l1_m',self.l1_m)

            #l2_m_corrected = self.l2_m / (1-(decay_rate_1 ** self.t))
            #l2_v_corrected = self.l2_v / (1-(decay_rate_2 ** self.t))
            #print('l2_m_corrected',l2_m_corrected)
            #print('l2_v_corrected',l2_v_corrected)

            # Computing bias-corrected moment
            l1_m_corrected = self.l1_m / (1-(decay_rate_1 ** self.t))
            l1_v_corrected = self.l1_v / (1-(decay_rate_2 ** self.t))

            # Update Weights
            #w1_update = l2_m_corrected / (np.sqrt(l2_v_corrected) + epsilon)
            w0_update = l1_m_corrected / (np.sqrt(l1_v_corrected) + epsilon)


            #self.w1 -= (self.lr * w1_update)
            self.w0 -= (self.lr * w0_update)
            #print('self.w1',self.w1)
            #print('self.w0',self.w0)


    def run(self, features):
        l0 = features
        l1 = self.activation_function1(np.dot(l0, self.w0))
        #l2 = self.activation_function2(np.dot(l1, self.w1))

        return l1


In [None]:
def MSE(y, Y):
    return np.mean((y-Y)**2)

## Training

In [None]:
import time
lossessgd = {'train':[], 'validation':[], 'prediction_train': []} # For Plotting of boxplot
def build_network(network, epochs, optimizer, seed_n, batch_size = None):
    losses = {'train':[], 'validation':[], 'prediction_train': []} # For Plotting of MSE

    start = time.time()
    train_loss=0
    val_loss=0
    batch_size =train_X.shape[0]
    # Iterating Over Epochs
    np.random.seed(42)
    for i in range(epochs):


        if optimizer == 'sgd':
            # Iterating over mini batches

            for k in range(train_X.shape[0]//batch_size):
               batch = np.random.choice(train_X.index, size=batch_size)
               #print('train_X', train_X)
                #X, y = train_X.ix[batch].values, train_y[batch]
               #print('batch',batch)
               #X, y = train_X.iloc[batch].values, train_y[batch]
               #print(batch)
               #network.train(X, y, optimizer)
               network.train(train_X, train_y, optimizer, seed_n)

           #network.train(train_X, train_y, optimizer)
            train_loss = MSE(network.run(train_X), train_y)
            val_loss = MSE(network.run(val_X), val_y)
            #if (i == 10) or (i == 100) or (i == 1000):
            #  print('Epoch {}, Train Loss: {}, Val Loss: {}'.format(i, train_loss, val_loss))
            #if i % 100 == 0:
            #    print('Epoch {}, Train Loss: {}, Val Loss: {}'.format(i, train_loss, val_loss))

        #if i>100:
        if optimizer == 'adam':
            network.train(train_X,
                          train_y,
                          optimizer, seed_n,
                          decay_rate_1 = 0.9,
                          decay_rate_2 = 0.99,
                          epsilon = 10e-8)

            train_loss = MSE(network.run(train_X), train_y)
            val_loss = MSE(network.run(val_X), val_y)


        losses['train'].append(train_loss)
        losses['validation'].append(val_loss)

    if optimizer == 'sgd':
          lossessgd['train'].append("SGD")
          lossessgd['validation'].append(val_loss)
    if optimizer == 'adam':
          lossessgd['train'].append("Adam")
          lossessgd['validation'].append(val_loss)

    if optimizer == 'RC':
      network.train(train_X,
                          train_y,
                          optimizer, seed_n,
                          decay_rate_1 = 0.9,
                          decay_rate_2 = 0.99,
                          epsilon = 10e-8)
      train_loss = MSE(network.run(train_X), train_y)
      val_loss = MSE(network.run(val_X), val_y)
      losses['train'].append(train_loss)
      losses['validation'].append(val_loss)
      lossessgd['train'].append("RC")
      lossessgd['validation'].append(val_loss)
    if optimizer == 'DC':
      network.train(train_X,
                          train_y,
                          optimizer, seed_n,
                          decay_rate_1 = 0.9,
                          decay_rate_2 = 0.99,
                          epsilon = 10e-8)
      train_loss = MSE(network.run(train_X), train_y)
      val_loss = MSE(network.run(val_X), val_y)
      losses['train'].append(train_loss)
      losses['validation'].append(val_loss)
      lossessgd['train'].append("DC")
      lossessgd['validation'].append(val_loss)
    predictions_train=network.run(train_X)
    for i in range(train_X.shape[0]):
      losses['prediction_train'].append(predictions_train[i])
    #print('w0',network.w0)
    #losses['predictions_test'].append(network.run(val_X))
    print('Train Loss: {}, Val Loss: {}'.format(train_loss, val_loss))
    print('Time Taken:{0:.4f}s'.format(time.time()-start))
    return losses

In [None]:
lossessgd = {'train':[], 'validation':[], 'prediction_train': []} # For Plotting of boxplot
lossesrc = {'mse':[]} # For Plotting of boxplot
epochs=1000
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)
for jj in range(1):
  j=1
  learning_rate = 0.01
  hidden_nodes = 1
  output_nodes = 1
  #batch_size = 53
  batch_size =train_X.shape[0]
  print(batch_size)
  network_adam = NeuralNetwork(train_X.shape[1], hidden_nodes, output_nodes, learning_rate,j)
  network_rc = NeuralNetwork(train_X.shape[1], hidden_nodes, output_nodes, learning_rate, j)
  network_dc = NeuralNetwork(train_X.shape[1], hidden_nodes, output_nodes, learning_rate, j)
  network_sgd = NeuralNetwork(train_X.shape[1], hidden_nodes, output_nodes, learning_rate, j)

  print('Training Model with Adam')
  losses_adam = build_network(network_adam, epochs, 'adam', j)
  #predict_y=network_adam.run(val_X)
  print('Training Model with RC')
  losses_rc = build_network(network_rc, epochs, 'RC', j)
  print('Training Model with DC')
  losses_dc = build_network(network_dc, epochs, 'DC', j)
  print('\nTraining Model with SGD')
  losses_sgd = build_network(network_sgd, epochs, 'sgd', j, batch_size)

12230
Training Model with Adam


ValueError: operands could not be broadcast together with shapes (12230,1) (297585,1) 

In [None]:
predict_y=network_adam.run(val_X)

In [None]:
predict_y=network_rc.run(val_X)

In [None]:
predict_y=network_dc.run(val_X)

In [None]:
predict_y=network_sgd.run(val_X)

In [None]:
predict_y=network_adam.run(train_X)

In [None]:
predict_y=network_rc.run(train_X)

In [None]:
predict_y=network_dc.run(train_X)

In [None]:
predict_y=network_sgd.run(train_X)

In [None]:
from sklearn.metrics import mean_absolute_error
print('TRAIN')
predict_y=network_adam.run(train_X)
print('mae adam',mean_absolute_error(train_y, predict_y))
predict_y=network_rc.run(train_X)
print('mae rc',mean_absolute_error(train_y, predict_y))
predict_y=network_dc.run(train_X)
print('mae dc',mean_absolute_error(train_y, predict_y))
#predict_y=network_sgd.run(train_X)
print('mae sgd',mean_absolute_error(train_y, predict_y))
print('TEST')
predict_y=network_adam.run(val_X)
print('mae adam',mean_absolute_error(val_y, predict_y))
predict_y=network_rc.run(val_X)
print('mae rc',mean_absolute_error(val_y, predict_y))
predict_y=network_dc.run(val_X)
print('mae dc',mean_absolute_error(val_y, predict_y))
#predict_y=network_sgd.run(val_X)
print('mae sgd',mean_absolute_error(val_y, predict_y))

In [None]:
dr = pd.DataFrame({'MSE':lossessgd['validation']})
dr['Method'] = lossessgd['train']
print (dr)

In [None]:
dr2 = pd.DataFrame({'MSE':lossessgd['validation']})
dr2['Method'] = lossessgd['train']
print (dr2)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
palette = sns.color_palette()
ax=sns.boxplot(x="Method", y="MSE", data=dr2, orient="v", showmeans=True, showfliers=True, palette=palette, meanprops={"marker": "+", "markeredgecolor": "black", "markersize": "10"})
ax.set_xlabel("Method",fontsize=12)
ax.set_ylabel("MSE",fontsize=12)
ax.tick_params(labelsize=12)
figure = ax.get_figure()
figure.savefig('test.png', dpi=500)

In [None]:
mean_mse = dr2.groupby('Method')['MSE'].mean().reset_index()

# Печать результата
print(mean_mse)

In [None]:
mean_mse = dr2.groupby('Method')['MSE'].min().reset_index()
print(mean_mse)

In [None]:
range_mse = dr2.groupby('Method')['MSE'].agg(lambda x: x.max() - x.min()).reset_index()

# Печать результата
range_mse.columns = ['Method', 'Range']
print(range_mse)

In [None]:
pivot_df = dr2.pivot(columns='Method', values='MSE')

# Вычисление корреляционной матрицы
correlation_matrix = pivot_df.corr()
print(correlation_matrix)

In [None]:
pivot_df

In [None]:
adam_mse = dr2[dr2['Method'] == 'Adam']['MSE']
rc_mse = dr2[dr2['Method'] == 'SGD']['MSE']
correlation = adam_mse.corr(rc_mse)
print(correlation)

In [None]:
pip install linearmodels

In [None]:
from linearmodels.panel import
from linearmodels.datasets import wage_panel
import statsmodels.api as sm
#df.query('Year1!=2020')[['FA_Ass7','KL8','ZK_Ass2','obor_A']]
#y_branch1_2020=df.query('Year1==2020')['Net_Ass5']
#МОДЕЛЬ 1
#data = X_branch1_for_regression.set_index(['id','Year1'])
#data = X_branch1_for_regression.set_index(['Year1','id'])
#exog = sm.add_constant(data[['age','Growth', 'ZK_Ass2', 'Revlg6', 'FA_Ass7', 'rev', 'KL8','trorg1']])
X_branch1_for_regression=df.query('Year1 != 2020')[['id','Year1','Net_Ass5','FA_Ass7','KL8','ZK_Ass2','obor_A']]
data = X_branch1_for_regression.set_index(['id','Year1'])
exog = sm.add_constant(data[['FA_Ass7','KL8','ZK_Ass2','obor_A']])
mod = (data.Net_Ass5, exog)
re_res = mod.fit()
print(re_res)

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
reg = LinearRegression(fit_intercept=False).fit(train_X, train_y)
result_reg_train=reg.predict(train_X)
print(mean_absolute_error(train_y, result_reg_train))
print(mean_squared_error(train_y, result_reg_train))
result_reg_train=reg.predict(val_X)
print(mean_absolute_error(val_y, result_reg_train))
print(mean_squared_error(val_y, result_reg_train))

In [None]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
result_reg_train = re_res.predict(exog)
data_y=df.query('Year1 != 2020')['Net_Ass5']
#data_y=df['Net_Ass5']
#print(result_reg_train)
#print(data_y)
print(mean_absolute_error(data_y, result_reg_train.predictions))
print(mean_squared_error(data_y, result_reg_train.predictions))


In [None]:
X_branch1_for_regression=df.query('Year1 == 2020')[['id','Year1','Net_Ass5','FA_Ass7','KL8','ZK_Ass2','obor_A']]
data = X_branch1_for_regression.set_index(['id','Year1'])
exog = sm.add_constant(data[['FA_Ass7','KL8','ZK_Ass2','obor_A']])
data_y=df.query('Year1 == 2020')['Net_Ass5']
result_reg_train = re_res.predict(exog)
print(mean_absolute_error(data_y, result_reg_train.predictions))
print(mean_squared_error(data_y, result_reg_train.predictions))

In [None]:
predict_y=losses_adam['prediction_train']

In [None]:
#для тестовой
b=[0]*predict_y.shape[0]
for i in range(predict_y.shape[0]):
   b[i]=predict_y[i,0]
c=[0]*val_y.shape[0]
for i in range(val_y.shape[0]):
   c[i]=val_y[i,0]

In [None]:
#для тестовой adam
d=[0]*predict_y.shape[0]
for i in range(predict_y.shape[0]):
   d[i]=predict_y[i,0]
#c=[0]*val_y.shape[0]
#for i in range(val_y.shape[0]):
#   c[i]=val_y[i,0]

In [None]:
#для тестовой sgd
g=[0]*predict_y.shape[0]
for i in range(predict_y.shape[0]):
   g[i]=predict_y[i,0]
#c=[0]*val_y.shape[0]
#for i in range(val_y.shape[0]):
#   c[i]=val_y[i,0]

In [None]:
#для тестовой DC
h=[0]*predict_y.shape[0]
for i in range(predict_y.shape[0]):
   h[i]=predict_y[i,0]
#c=[0]*val_y.shape[0]
#for i in range(val_y.shape[0]):
#   c[i]=val_y[i,0]

In [None]:
train_y.shape[0]

In [None]:
b=[0]*len(predict_y)
for i in range(len(predict_y)):
   b[i]=predict_y[i]
c=[0]*train_y.shape[0]
h=0
k=0
for i in range(train_y.shape[0]):
      c[i]=train_y[i,0]

In [None]:
#ЭТО то что нужно после предикт
b=[0]*predict_y.shape[0]
for i in range(predict_y.shape[0]):
   b[i]=predict_y[i,0]
c=[0]*train_y.shape[0]
h=0
k=0
for i in range(train_y.shape[0]):
      c[i]=train_y[i,0]

In [None]:
val_y[0,0]

In [None]:
pa=pd.DataFrame()
#pa['Year1']=X_branch1_2020_n.reset_index().Year1.iloc[0:-1]
#pa['id']=X_test.drop("id", axis=1).reset_index().id.iloc[0:-1]
#pa['Prediction']=[i[0] for i in y_train_preI_2020][1:]
pa['Prediction']=b
pa['Actual Values']=c
pa

In [None]:
pa2=pd.DataFrame()
#pa['Year1']=X_branch1_2020_n.reset_index().Year1.iloc[0:-1]
#pa['id']=X_test.drop("id", axis=1).reset_index().id.iloc[0:-1]
#pa['Prediction']=[i[0] for i in y_train_preI_2020][1:]
pa2['Prediction Adam']=d
pa2['Prediction SGD']=g
pa2['Prediction RC']=b
pa2['Prediction DC']=h
pa2['Actual Values']=c
pa2

In [None]:
import matplotlib.pyplot as plt
#plt.figure(figsize=(20,10))
pa2['Prediction Adam'].plot(kind='line',label='Adam', alpha=1)
pa2['Prediction SGD'].plot(kind='line',label='SGD', alpha=1)
pa2['Prediction RC'].plot(kind='line',label='RC', alpha=1)
pa2['Prediction DC'].plot(kind='line',label='DC', alpha=1)
pa2['Actual Values'].plot(kind='line',label='actual values', alpha=1)

plt.xticks(rotation=90,size=13)
plt.yticks(size=13)

plt.ylabel('Wilshire 2500 index',fontsize=13)
plt.xlabel('Date',fontsize=13)
#plt.title('Predicted Values and Actual Values',fontsize=20)
plt.legend(fontsize=11)
plt.savefig('test.png', dpi=400, bbox_inches="tight")

In [None]:
h2=[0]*predict_y.shape[0]
for i in range(predict_y.shape[0]):
   h2[i]=abs(h[i]-c[i])
d2=[0]*predict_y.shape[0]
for i in range(predict_y.shape[0]):
   d2[i]=abs(d[i]-c[i])
g2=[0]*predict_y.shape[0]
for i in range(predict_y.shape[0]):
   g2[i]=abs(g[i]-c[i])
b2=[0]*predict_y.shape[0]
for i in range(predict_y.shape[0]):
   b2[i]=abs(b[i]-c[i])

In [None]:
pa3=pd.DataFrame()
#pa['Year1']=X_branch1_2020_n.reset_index().Year1.iloc[0:-1]
#pa['id']=X_test.drop("id", axis=1).reset_index().id.iloc[0:-1]
#pa['Prediction']=[i[0] for i in y_train_preI_2020][1:]
pa3['Adam']=d2
pa3['SGD']=g2
pa3['RC']=b2
pa3['DC']=h2
pa3['Actual Values']=c
pa3

In [None]:
palette = sns.color_palette()
ax=sns.boxplot(x="Method", y="MSE", data=dr2, orient="v", showmeans=True, showfliers=True, palette=palette, meanprops={"marker": "+", "markeredgecolor": "black", "markersize": "10"})
ax.set_xlabel("Method",fontsize=12)
ax.set_ylabel("MSE",fontsize=12)
ax.tick_params(labelsize=12)
figure = ax.get_figure()
figure.savefig('test.png', dpi=500)

In [None]:
df_melted = pa3.melt(var_name='Method', value_name='Prediction', value_vars=['Adam', 'SGD', 'RC', 'DC'])
palette = sns.color_palette()
# Создание boxplot
#plt.figure(figsize=(10, 6))
sns.boxplot(x='Method', y='Prediction', data=df_melted, showmeans=True, palette=palette, meanprops={"marker": "+", "markeredgecolor": "black", "markersize": "10"})
#plt.title('Boxplot of Predictions by Method')
plt.xlabel('Method', fontsize=12)
plt.ylabel('Absolute errors in predicted index', fontsize=12)
plt.xticks(rotation=45)
#plt.grid(True)
plt.tick_params(labelsize=12)
# Показать график
plt.tight_layout()
plt.savefig('test.png', dpi=500)
plt.show()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
palette = sns.color_palette()
ax=sns.boxplot(x="Method", y="MSE", data=pa2, orient="v", showmeans=True, showfliers=True, palette=palette, meanprops={"marker": "+", "markeredgecolor": "black", "markersize": "10"})
ax.set_xlabel("Method",fontsize=12)
ax.set_ylabel("MSE",fontsize=12)
ax.tick_params(labelsize=12)
figure = ax.get_figure()
figure.savefig('test.png', dpi=500)

In [None]:
import matplotlib.pyplot as plt
#plt.figure(figsize=(20,10))
pa['Prediction'].plot(kind='line',label='prediction',color='red',alpha=1)
pa['Actual Values'].plot(kind='line',label='actual values',color='blue',alpha=0.4)
plt.xticks(rotation=90,size=13)
plt.yticks(size=13)

plt.ylabel('Wilshire 2500 index',fontsize=13)
plt.xlabel('Date',fontsize=13)
#plt.title('Predicted Values and Actual Values',fontsize=20)
plt.legend(fontsize=11)
plt.savefig('test.png', dpi=400, bbox_inches="tight")

In [None]:
pa=pd.DataFrame()
pa['Prediction Adam']=network_adam.run(val_X)
pa['Prediction SGD']=network_sgd.run(val_X)
pa['Prediction RC']=network_rc.run(val_X)
pa['Prediction DC']=network_dc.run(val_X)
import matplotlib.pyplot as plt
#plt.figure(figsize=(20,10))
pa['Prediction Adam'].plot(kind='line',label='prediction',color='red',alpha=1)
pa['Prediction SGD'].plot(kind='line',label='prediction',color='red',alpha=1)
pa['Prediction RC'].plot(kind='line',label='prediction',color='red',alpha=1)
pa['Prediction DC'].plot(kind='line',label='prediction',color='red',alpha=1)
pa['Actual Values'].plot(kind='line',label='actual values',color='blue',alpha=0.4)
plt.xticks(rotation=90,size=13)
plt.yticks(size=13)

plt.ylabel('maxtp',fontsize=13)
plt.xlabel('Observations',fontsize=13)
#plt.title('Predicted Values and Actual Values',fontsize=20)
plt.legend(fontsize=11)
plt.savefig('test.png', dpi=400, bbox_inches="tight")

In [None]:
plt.plot(losses_adam['train'], label='Adam')
plt.plot(losses_sgd['train'], label='SGD')
#plt.plot(losses_rc['train'], label='RC Training Loss')
#plt.plot(losses_dc['train'], label='DC Training Loss')
plt.ylabel('MSE',fontsize=13)
plt.xlabel('Iterations',fontsize=13)
plt.xticks(size=13)
plt.yticks(size=13)
plt.legend()
plt.savefig('test2.png', dpi=400, bbox_inches="tight")

In [None]:
lossesrc['mse']

In [None]:
plt.plot(lossesrc['mse'], label="RC")
#plt.plot(losses_rc['train'], label='RC Training Loss')
#plt.plot(losses_dc['train'], label='DC Training Loss')
plt.ylabel('MSE',fontsize=13)
plt.xlabel('Iterations',fontsize=13)
plt.xticks(size=13)
plt.yticks(size=13)
plt.legend()
plt.savefig('test2.png', dpi=400, bbox_inches="tight")

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Создание DataFrame
data = {
    'rmax': ['10000', '20000', '40000', '80000', '100000', '150000'],
    'RС': [180.292338, 179.508615, 179.219025, 179.189593, 179.164364, 179.224456],
    'DС': [180.416213, 179.992796, 180.297284, 179.832214, 180.395676, 179.828004]

}

dk = pd.DataFrame(data)

# Построение графика
plt.figure(figsize=(6, 4))

plt.plot(dk['rmax'], dk['RС'], marker='o', color='b', linestyle='-', linewidth=2, label='RC')
plt.plot(dk['rmax'], dk['DС'], marker='o', color='r', linestyle='-', linewidth=2, label='DC')
# Добавление заголовка и меток
#plt.title('Температура за неделю')
#plt.xlabel('rmax')
plt.ylabel('MSE')
plt.xlabel(r'$r_{max}$')
#plt.grid()
plt.legend()
# Показать график
#plt.show()
plt.savefig('test.png', dpi=500)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Создание DataFrame
data = {
    'dy': [r'$10^7$', r'$2 \cdot 10^7$', r'$4 \cdot 10^7$', r'$8 \cdot 10^7$', r'$10^8$', r'$1.5 \cdot 10^8$'],
    'RС': [180.06711, 179.465222, 180.292338, 179.372332, 179.204481, 179.473249],
    'DС': [180.510129, 180.207314, 180.395676, 179.890397, 180.229389, 180.093852]

}

dk = pd.DataFrame(data)

# Построение графика
plt.figure(figsize=(7, 4))

plt.plot(dk['dy'], dk['RС'], marker='o', color='b', linestyle='-', linewidth=2, label='RC')
plt.plot(dk['dy'], dk['DС'], marker='o', color='r', linestyle='-', linewidth=2, label='DC')
# Добавление заголовка и меток
#plt.title('Температура за неделю')
plt.xlabel(r'$\Delta y*$')
plt.ylabel('MSE')
#plt.grid()
plt.legend()
# Показать график
#plt.show()
plt.savefig('test.png', dpi=500)

In [None]:
plt.plot(losses_adam['train'], label='Adam Training Loss')
plt.plot(losses_adam['validation'], label='Adam Validation Loss')
plt.plot(losses_sgd['train'], label='SGD Training Loss')
plt.plot(losses_sgd['validation'], label='SGD Validation Loss')
plt.plot(losses_rc['train'], label='RC Training Loss')
plt.plot(losses_rc['validation'], label='DC Validation Loss')
plt.legend()
#_ = plt.ylim()

From the plots, we can observed that using Adam, weights of the neural network are more smoothly adjusted to reduce the training loss. Try increasing the learning rate, and you can see that Adam converges much faster compared to SGD, using an adaptive learning rate.

The benefits of using Adam are not so obvious as the size of the data is very small and increasing training epochs tend to lead to overfitting and early-stopping is required. It is recommended to set the epochs for Adam to around 200 for the above hyperparameters configuration, as the training and validation loss starts diverging. However, we kept the epochs for both networks the same for plotting.

Lastly, in this implementation, Adam is much faster to compute compared to SGD as it is processed as an entire training batch.  

## Test Model

Here, we will compare between the models trained wih Adam and SGD on the test set.

In [None]:
def test_model(network):
    test_predictions = network.run(test_X)
    correct = 0
    total = 0
    for i in range(len(test_predictions)):
        total += 1
        if test_predictions[i] < 0.5 and test_y[i] == 0:
            correct += 1
        elif test_predictions[i] >= 0.5 and test_y[i] == 1:
            correct += 1
    return correct/total


In [None]:
print('Adam Test Accuracy: {}'.format(test_model(network_adam)))
print('SGD Test Accuracy: {}'.format(test_model(network_sgd)))

With an accuracy of 85%, we are placed under the top 100 for the Titanic Machine Learning challenge! Try adding more layers/ dropout to the neural network or swapping out sigmoid for relu or optimize the hyperparameters to improve the accuracy.