# **Data cleaning and normalization** (GS, IFFT, TI, BRANCHES, INDEXES)

**RELEASE**

In [1]:
REL = '_R004'

##**Importing libraries and connecting disk, global ads**

Connecting disk

In [2]:
from google.colab import drive
drive.mount('/gdrive', force_remount=True)

Mounted at /gdrive


Collecting own modules

In [3]:
import sys
sys.path.append('/gdrive/My Drive/Release')
import Modules.global_f as mgf

Importing libraries

In [4]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

import os
import datetime
from google.colab import files

pd.set_option("display.max_columns", None)
pd.set_option('display.float_format', lambda x: '%.2f' % x)

import warnings
warnings.filterwarnings("ignore")

Data directories paths

In [5]:
DATA_PATH = '/gdrive/My Drive/Data/'
INDEXES_PATH = '/gdrive/My Drive/Data/Raw_data/Indexes/'
BRANCHES_PATH = '/gdrive/My Drive/Data//Raw_data/Branches/'

##**Task implementation**

Loading data

In [6]:
df1 = pd.read_csv(DATA_PATH + 'Collector_Features_(DT_GS_TI_FFTs_INXs_BRHs)_R004.csv', parse_dates=[0])

Filling NaNs by the previous value

In [7]:
df2 = df1.copy()
df2 = df2.fillna(method='bfill')
df2.info(max_cols=125)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2565 entries, 0 to 2564
Data columns (total 124 columns):
 #   Column                                                         Non-Null Count  Dtype         
---  ------                                                         --------------  -----         
 0   Date                                                           2565 non-null   datetime64[ns]
 1   price                                                          2565 non-null   float64       
 2   Open                                                           2565 non-null   float64       
 3   Max                                                            2565 non-null   float64       
 4   Min                                                            2565 non-null   float64       
 5   Vol mln                                                        2565 non-null   object        
 6   change %                                                       2565 non-null   float64       
 

Saving bfilled dataset

In [8]:
df2.to_csv(DATA_PATH+'Data_Cleaner_!!bfilled!!' + REL +'.csv', index=None)

Cleaning data by droping nulls

In [9]:
# Получим названия столбцов
df1.columns.values

array(['Date', 'price', 'Open', 'Max', 'Min', 'Vol mln', 'change %',
       'ma7', 'ma14', 'ma21', '26ema', '12ema', 'MACD', '20sd',
       'upper_band', 'lower_band', 'ema', 'wma', 'KAMA', 'DEMA',
       'momentum', 'log_momentum', 'RSI', 'Stochk', 'Stochd',
       'Stoch RSIk', 'Stoch RSId', 'ADX', 'ADXR', 'DX', 'WillR', 'CCI',
       'ATR', 'NATR', 'ULTOSC', 'ROC', 'BOP', 'Aroondown', 'Aroonup',
       'AroonOSC', 'APO', 'PPO', 'CMO', 'ifft_3', 'ifft_6', 'ifft_9',
       'ifft_100', 'dow-jones', 's-p-500', 'dax', 'nasdaq-composite',
       'ftse-100', 'cac-40', 'nikkei-225', 'sse-composite', 'hang-seng',
       'kospi', 'bse-sensex', 'bovespa', 'merval', 'vix-index',
       'индекс-мосбиржи', 'bitcoin', 'ifx-cbonds', 'ртс', 'нефть-brent',
       'msci-world-index', 's-p_tsx-composite', 'asx-200',
       'kase-(казахстан)', 'ux-(украина)', 'золото-(лондон)',
       'серебро-(лондон)', 'платина-(лондон)', 'палладий-(лондон)',
       'LIBOR USD', 'LIBOR EUR', 'LIBOR GBP', 'LIBOR CHF',

Excluding columns 'Vol mln ' and those with many missed values - 'alps-alerian-mlp-etf-(usd)', 'comstage-nyse-arca-gold-bugs-ucits-etf-(usd)',  'jp-morgan-chase-commercial-mortgage-securities-corp-pref-nyse', 'StockRSIk', 'Aroondown', 'Aroonup'

In [10]:
keys= ['Date', 'price', 'Open', 'Max', 'Min', 'change %', 'ma7',
       'ma14', 'ma21', '26ema', '12ema', 'MACD', '20sd', 'upper_band',
       'lower_band', 'ema', 'wma', 'KAMA', 'DEMA', 'momentum', 'log_momentum',
       'RSI', 'Stochk', 'Stochd', 'Stoch RSId', 'ADX', 'ADXR',
       'DX', 'WillR', 'CCI', 'ATR', 'NATR', 'ULTOSC', 'ROC', 'BOP',
       'AroonOSC', 'APO', 'PPO', 'CMO', 'ifft_3',
       'ifft_6', 'ifft_9', 'ifft_100', 'dow-jones', 's-p-500', 'dax',
       'nasdaq-composite', 'ftse-100', 'cac-40', 'nikkei-225', 'sse-composite',
       'hang-seng', 'kospi', 'bse-sensex', 'bovespa', 'merval',
       'vix-index', 'индекс-мосбиржи', 'bitcoin', 'ifx-cbonds', 'ртс',
       'нефть-brent', 'msci-world-index', 's-p_tsx-composite', 'asx-200',
       'kase-(казахстан)', 'ux-(украина)', 'золото-(лондон)',
       'серебро-(лондон)', 'платина-(лондон)', 'палладий-(лондон)',
       'LIBOR USD', 'LIBOR EUR', 'LIBOR GBP', 'LIBOR CHF', 'LIBOR JPY',
       'morgan-stanley-nyse', 'GBP_USD', 'USD_JPY',
       'Dow Jones Basic Materials', 'Dow Jones Consumer Goods',
       'Dow Jones Consumer Services', 'Dow Jones Financials',
       'Dow Jones Health Care', 'Dow Jones Industrials', 'Dow Jones Oil & Gas',
       'Dow Jones Technology', 'Dow Jones Telecommunications',
       'Dow Jones Utilities', 'NYSE Energy', 'NYSE Financials',
       'NYSE Healthcare', 'NYSE TMT', 'Philadelphia Semiconductor Index',
       'NASDAQ Bank', 'NASDAQ Biotechnology', 'NASDAQ Computer',
       'NASDAQ Financial 100', 'NASDAQ Health Care', 'NASDAQ Industrial',
       'NASDAQ Insurance', 'NASDAQ Internet', 'NASDAQ Other Finance',
       'NASDAQ Telecommunications', 'NASDAQ Transportation',
       'S&P 500 Utilities', 'S&P 500 Consumer Discretionary',
       'S&P 500 Consumer Staples', 'S&P 500 Energy', 'S&P 500 Financials',
       'S&P 500 Health Care', 'S&P 500 Industrials',
       'S&P 500 Information Technology', 'S&P 500 Materials',
       'S&P 500 Real Estate', 'S&P 500 Telecom Services', 'NYSE  Composite']

In [11]:
df = df1[keys]
df.info(max_cols=125)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2565 entries, 0 to 2564
Data columns (total 117 columns):
 #   Column                            Non-Null Count  Dtype         
---  ------                            --------------  -----         
 0   Date                              2565 non-null   datetime64[ns]
 1   price                             2565 non-null   float64       
 2   Open                              2565 non-null   float64       
 3   Max                               2565 non-null   float64       
 4   Min                               2565 non-null   float64       
 5   change %                          2565 non-null   float64       
 6   ma7                               2559 non-null   float64       
 7   ma14                              2552 non-null   float64       
 8   ma21                              2545 non-null   float64       
 9   26ema                             2565 non-null   float64       
 10  12ema                             2565 non-null

Cut off the dates to 2011-04-07

In [12]:
df_clear = df[df['Date'] > '2011-04-07'].reset_index(drop=True)
df_clear.head()

Unnamed: 0,Date,price,Open,Max,Min,change %,ma7,ma14,ma21,26ema,12ema,MACD,20sd,upper_band,lower_band,ema,wma,KAMA,DEMA,momentum,log_momentum,RSI,Stochk,Stochd,Stoch RSId,ADX,ADXR,DX,WillR,CCI,ATR,NATR,ULTOSC,ROC,BOP,AroonOSC,APO,PPO,CMO,ifft_3,ifft_6,ifft_9,ifft_100,dow-jones,s-p-500,dax,nasdaq-composite,ftse-100,cac-40,nikkei-225,sse-composite,hang-seng,kospi,bse-sensex,bovespa,merval,vix-index,индекс-мосбиржи,bitcoin,ifx-cbonds,ртс,нефть-brent,msci-world-index,s-p_tsx-composite,asx-200,kase-(казахстан),ux-(украина),золото-(лондон),серебро-(лондон),платина-(лондон),палладий-(лондон),LIBOR USD,LIBOR EUR,LIBOR GBP,LIBOR CHF,LIBOR JPY,morgan-stanley-nyse,GBP_USD,USD_JPY,Dow Jones Basic Materials,Dow Jones Consumer Goods,Dow Jones Consumer Services,Dow Jones Financials,Dow Jones Health Care,Dow Jones Industrials,Dow Jones Oil & Gas,Dow Jones Technology,Dow Jones Telecommunications,Dow Jones Utilities,NYSE Energy,NYSE Financials,NYSE Healthcare,NYSE TMT,Philadelphia Semiconductor Index,NASDAQ Bank,NASDAQ Biotechnology,NASDAQ Computer,NASDAQ Financial 100,NASDAQ Health Care,NASDAQ Industrial,NASDAQ Insurance,NASDAQ Internet,NASDAQ Other Finance,NASDAQ Telecommunications,NASDAQ Transportation,S&P 500 Utilities,S&P 500 Consumer Discretionary,S&P 500 Consumer Staples,S&P 500 Energy,S&P 500 Financials,S&P 500 Health Care,S&P 500 Industrials,S&P 500 Information Technology,S&P 500 Materials,S&P 500 Real Estate,S&P 500 Telecom Services,NYSE Composite
0,2011-04-08,160.96,163.49,163.62,160.82,-0.89,160.27,159.58,159.08,160.31,160.08,-0.23,1.99,163.06,155.11,161.28,159.54,161.09,158.5,159.96,5.07,51.26,72.47,67.03,84.97,16.08,18.31,8.7,-38.78,112.1,2.79,1.73,45.24,1.89,-0.9,50.0,0.07,0.04,2.53,165.92,178.85,175.12,156.22,12380.05,1328.17,7217.02,2780.42,6055.75,4061.91,9768.08,3030.02,24396.07,2127.97,19451.45,68718.0,3480.39,17.87,1855.97,,300.75,2123.56,126.65,1351.43,14208.43,4940.57,1794.13,2779.61,1469.5,40.22,1803.0,798.0,0.15,0.51,0.57,0.07,0.11,27.25,1.64,84.75,330.65,332.35,357.18,296.8,350.32,341.94,679.09,680.86,137.29,158.15,14459.38,5213.36,6885.69,6134.44,439.58,1833.9,1062.2,1417.53,2263.2,282.7,2377.9,4255.6,230.5,4237.0,222.1,2499.2,162.47,308.81,312.26,591.18,221.7,385.92,324.01,415.54,250.66,128.91,132.2,8483.94
1,2011-04-11,161.47,160.54,162.91,160.54,0.32,160.68,159.63,159.12,160.4,160.29,-0.1,2.06,163.24,155.01,161.41,159.64,161.1,158.74,160.47,5.08,52.58,58.85,66.23,74.82,15.43,17.81,7.08,-33.03,97.54,2.76,1.71,44.84,3.2,0.39,50.0,0.18,0.11,5.17,165.69,178.32,174.13,155.46,12381.11,1324.46,7212.86,2771.51,6053.44,4038.7,9719.7,3022.54,24303.07,2122.39,19262.54,68164.0,3448.12,16.59,1845.78,,301.04,2113.55,123.98,1348.65,13996.86,4971.22,1788.77,2754.99,1468.0,41.37,1802.0,795.0,0.15,0.52,0.57,0.07,0.11,26.91,1.63,84.45,325.05,333.34,357.23,296.71,351.6,341.24,666.53,679.11,137.45,156.09,14221.63,5207.67,6905.78,6130.59,435.88,1829.4,1062.4,1413.19,2260.2,283.1,2364.1,4250.6,229.1,4224.7,221.7,2492.3,160.26,308.58,314.02,580.27,221.58,387.75,323.71,414.88,247.58,128.86,132.17,8445.77
2,2011-04-12,160.42,160.25,161.44,159.45,-0.65,160.71,159.69,159.22,160.4,160.31,-0.08,2.03,163.27,155.17,160.75,159.68,161.09,158.83,159.42,5.07,49.61,44.78,58.7,41.49,14.39,17.06,0.77,-44.87,31.35,2.7,1.69,44.41,1.23,0.09,50.0,0.33,0.21,-0.79,165.46,177.78,173.13,154.85,12263.58,1314.16,7094.31,2744.79,5964.47,3976.6,9555.26,3023.33,23976.37,2089.4,,66896.0,3401.55,17.09,1799.94,,301.07,2062.52,120.92,1333.85,13801.4,4898.69,1766.0,2668.24,1450.5,40.44,1785.0,783.0,0.14,0.82,0.57,0.07,0.11,26.81,1.63,83.76,318.51,333.84,356.77,295.41,351.34,338.3,646.77,672.78,136.58,155.28,13809.97,5174.97,6901.61,6085.41,426.78,1818.6,1058.7,1398.26,2243.7,282.4,2338.7,4201.3,226.5,4185.5,218.5,2490.9,159.89,308.09,314.85,562.93,220.72,387.21,321.21,411.19,244.25,128.43,131.44,8360.46
3,2011-04-13,160.17,162.5,163.98,159.83,-0.16,160.89,159.71,159.36,160.38,160.29,-0.09,1.66,162.68,156.03,160.36,159.71,161.09,158.89,159.17,5.07,48.89,33.21,45.61,23.18,14.33,17.0,13.55,-47.69,65.04,2.81,1.75,40.62,0.69,-0.56,50.0,0.68,0.43,-2.21,165.22,177.25,172.14,154.4,12270.99,1314.41,7186.82,2761.52,6010.44,4006.23,9641.18,3049.93,24135.03,2121.92,19696.86,66486.0,3407.24,16.92,1809.83,,301.08,2062.47,122.88,1338.75,13833.64,4910.96,1779.95,2736.89,1457.5,40.22,1786.0,773.0,0.14,1.09,0.57,0.07,0.11,26.79,1.63,83.74,317.61,334.54,357.28,293.62,351.19,338.3,648.73,677.99,136.35,155.74,13840.64,5166.76,6897.9,6108.41,427.89,1802.8,1069.4,1410.9,2233.6,284.1,2350.0,4200.7,229.7,4185.4,220.2,2484.4,160.44,309.12,315.06,564.47,219.0,386.85,320.44,414.03,242.73,128.34,131.11,8367.31
4,2011-04-14,155.79,157.51,157.6,155.25,-2.73,160.44,159.55,159.42,160.04,159.6,-0.44,1.66,162.74,156.1,157.31,159.46,160.92,158.4,154.79,5.04,38.5,16.34,31.44,0.0,13.96,16.98,9.17,-94.1,-145.12,2.96,1.9,34.66,-1.77,-0.73,-35.71,0.71,0.45,-23.01,164.99,176.71,171.14,154.1,12285.15,1314.52,7153.56,2760.22,5963.8,3970.39,9653.92,3043.06,24014.0,2141.06,,66278.0,3381.57,16.27,1781.51,,301.12,2028.67,122.36,1336.54,13821.8,4884.25,1761.49,2737.03,1465.75,40.67,1772.0,763.0,0.14,1.08,0.57,0.07,0.11,26.79,1.64,83.67,318.26,335.81,356.71,291.87,352.91,338.26,652.48,676.14,136.55,156.49,13888.7,5141.57,6931.75,6105.69,428.41,1807.4,1077.7,1407.35,2235.1,285.3,2350.0,4178.7,230.0,4199.3,219.4,2511.2,161.26,308.35,317.04,567.88,217.13,388.65,320.37,412.73,243.45,130.13,131.42,8374.16


In [13]:
df_clear.info(max_cols=125)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2503 entries, 0 to 2502
Data columns (total 117 columns):
 #   Column                            Non-Null Count  Dtype         
---  ------                            --------------  -----         
 0   Date                              2503 non-null   datetime64[ns]
 1   price                             2503 non-null   float64       
 2   Open                              2503 non-null   float64       
 3   Max                               2503 non-null   float64       
 4   Min                               2503 non-null   float64       
 5   change %                          2503 non-null   float64       
 6   ma7                               2503 non-null   float64       
 7   ma14                              2503 non-null   float64       
 8   ma21                              2503 non-null   float64       
 9   26ema                             2503 non-null   float64       
 10  12ema                             2503 non-null

The cleaning functions

In [14]:
""" The function returns 1 if the field contains a value that cannot be used.
 count - the global variable, marked records counter """
count = 0
def clearing_df(x):
  global count
  if x is None or x == 0 or x == "" or x == " " or pd.isna(x):
    ret = 1
    count += 1
  else:
    ret = 0
  return ret

In [15]:
""" The function returns 1 if the field  "not_use_***"  sum > lim """
def set_not_use(x, lim):
  if x >lim :
    ret = 1
  else:
    ret = 0
  return ret

Marking lines with missed values

In [16]:
for k in keys[1:]:
  print(k,' marked empty lines = ', end='')
  count = 0
  df['not_use_'+k] = df[k].apply(lambda x: clearing_df(x))
  print(count)
df['not_use_sum'] = df[df.columns[len(keys)+1:]].sum(axis=1)
#print(df.columns[len(keys):])
df['not_use'] = df['not_use_sum'].apply(lambda x: set_not_use(x, 0))

price  marked empty lines = 0
Open  marked empty lines = 0
Max  marked empty lines = 0
Min  marked empty lines = 0
change %  marked empty lines = 5
ma7  marked empty lines = 6
ma14  marked empty lines = 13
ma21  marked empty lines = 20
26ema  marked empty lines = 0
12ema  marked empty lines = 0
MACD  marked empty lines = 1
20sd  marked empty lines = 19
upper_band  marked empty lines = 20
lower_band  marked empty lines = 20
ema  marked empty lines = 0
wma  marked empty lines = 29
KAMA  marked empty lines = 30
DEMA  marked empty lines = 58
momentum  marked empty lines = 0
log_momentum  marked empty lines = 0
RSI  marked empty lines = 14
Stochk  marked empty lines = 8
Stochd  marked empty lines = 8
Stoch RSId  marked empty lines = 25
ADX  marked empty lines = 27
ADXR  marked empty lines = 40
DX  marked empty lines = 14
WillR  marked empty lines = 19
CCI  marked empty lines = 13
ATR  marked empty lines = 14
NATR  marked empty lines = 14
ULTOSC  marked empty lines = 28
ROC  marked empty lin

Data cleaning

In [17]:
df_clear = df[df['not_use_sum'] < 2].reset_index(drop=True)
df_clear.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2023 entries, 0 to 2022
Columns: 235 entries, Date to not_use
dtypes: datetime64[ns](1), float64(116), int64(118)
memory usage: 3.6 MB


In [18]:
df_clear.tail(-5)

Unnamed: 0,Date,price,Open,Max,Min,change %,ma7,ma14,ma21,26ema,12ema,MACD,20sd,upper_band,lower_band,ema,wma,KAMA,DEMA,momentum,log_momentum,RSI,Stochk,Stochd,Stoch RSId,ADX,ADXR,DX,WillR,CCI,ATR,NATR,ULTOSC,ROC,BOP,AroonOSC,APO,PPO,CMO,ifft_3,ifft_6,ifft_9,ifft_100,dow-jones,s-p-500,dax,nasdaq-composite,ftse-100,cac-40,nikkei-225,sse-composite,hang-seng,kospi,bse-sensex,bovespa,merval,vix-index,индекс-мосбиржи,bitcoin,ifx-cbonds,ртс,нефть-brent,msci-world-index,s-p_tsx-composite,asx-200,kase-(казахстан),ux-(украина),золото-(лондон),серебро-(лондон),платина-(лондон),палладий-(лондон),LIBOR USD,LIBOR EUR,LIBOR GBP,LIBOR CHF,LIBOR JPY,morgan-stanley-nyse,GBP_USD,USD_JPY,Dow Jones Basic Materials,Dow Jones Consumer Goods,Dow Jones Consumer Services,Dow Jones Financials,Dow Jones Health Care,Dow Jones Industrials,Dow Jones Oil & Gas,Dow Jones Technology,Dow Jones Telecommunications,Dow Jones Utilities,NYSE Energy,NYSE Financials,NYSE Healthcare,NYSE TMT,Philadelphia Semiconductor Index,NASDAQ Bank,NASDAQ Biotechnology,NASDAQ Computer,NASDAQ Financial 100,NASDAQ Health Care,NASDAQ Industrial,NASDAQ Insurance,NASDAQ Internet,NASDAQ Other Finance,NASDAQ Telecommunications,NASDAQ Transportation,S&P 500 Utilities,S&P 500 Consumer Discretionary,S&P 500 Consumer Staples,S&P 500 Energy,S&P 500 Financials,S&P 500 Health Care,S&P 500 Industrials,S&P 500 Information Technology,S&P 500 Materials,S&P 500 Real Estate,S&P 500 Telecom Services,NYSE Composite,not_use_price,not_use_Open,not_use_Max,not_use_Min,not_use_change %,not_use_ma7,not_use_ma14,not_use_ma21,not_use_26ema,not_use_12ema,not_use_MACD,not_use_20sd,not_use_upper_band,not_use_lower_band,not_use_ema,not_use_wma,not_use_KAMA,not_use_DEMA,not_use_momentum,not_use_log_momentum,not_use_RSI,not_use_Stochk,not_use_Stochd,not_use_Stoch RSId,not_use_ADX,not_use_ADXR,not_use_DX,not_use_WillR,not_use_CCI,not_use_ATR,not_use_NATR,not_use_ULTOSC,not_use_ROC,not_use_BOP,not_use_AroonOSC,not_use_APO,not_use_PPO,not_use_CMO,not_use_ifft_3,not_use_ifft_6,not_use_ifft_9,not_use_ifft_100,not_use_dow-jones,not_use_s-p-500,not_use_dax,not_use_nasdaq-composite,not_use_ftse-100,not_use_cac-40,not_use_nikkei-225,not_use_sse-composite,not_use_hang-seng,not_use_kospi,not_use_bse-sensex,not_use_bovespa,not_use_merval,not_use_vix-index,not_use_индекс-мосбиржи,not_use_bitcoin,not_use_ifx-cbonds,not_use_ртс,not_use_нефть-brent,not_use_msci-world-index,not_use_s-p_tsx-composite,not_use_asx-200,not_use_kase-(казахстан),not_use_ux-(украина),not_use_золото-(лондон),not_use_серебро-(лондон),not_use_платина-(лондон),not_use_палладий-(лондон),not_use_LIBOR USD,not_use_LIBOR EUR,not_use_LIBOR GBP,not_use_LIBOR CHF,not_use_LIBOR JPY,not_use_morgan-stanley-nyse,not_use_GBP_USD,not_use_USD_JPY,not_use_Dow Jones Basic Materials,not_use_Dow Jones Consumer Goods,not_use_Dow Jones Consumer Services,not_use_Dow Jones Financials,not_use_Dow Jones Health Care,not_use_Dow Jones Industrials,not_use_Dow Jones Oil & Gas,not_use_Dow Jones Technology,not_use_Dow Jones Telecommunications,not_use_Dow Jones Utilities,not_use_NYSE Energy,not_use_NYSE Financials,not_use_NYSE Healthcare,not_use_NYSE TMT,not_use_Philadelphia Semiconductor Index,not_use_NASDAQ Bank,not_use_NASDAQ Biotechnology,not_use_NASDAQ Computer,not_use_NASDAQ Financial 100,not_use_NASDAQ Health Care,not_use_NASDAQ Industrial,not_use_NASDAQ Insurance,not_use_NASDAQ Internet,not_use_NASDAQ Other Finance,not_use_NASDAQ Telecommunications,not_use_NASDAQ Transportation,not_use_S&P 500 Utilities,not_use_S&P 500 Consumer Discretionary,not_use_S&P 500 Consumer Staples,not_use_S&P 500 Energy,not_use_S&P 500 Financials,not_use_S&P 500 Health Care,not_use_S&P 500 Industrials,not_use_S&P 500 Information Technology,not_use_S&P 500 Materials,not_use_S&P 500 Real Estate,not_use_S&P 500 Telecom Services,not_use_NYSE Composite,not_use_sum,not_use
5,2011-04-28,150.60,152.55,152.60,150.20,-1.48,152.43,155.34,156.89,156.37,154.11,-2.26,3.96,164.80,148.98,151.37,156.21,159.19,153.37,149.60,5.01,32.92,39.94,46.76,59.30,20.42,18.25,31.98,-92.72,-88.47,2.86,1.90,38.16,-5.97,-0.81,-57.14,-2.84,-1.81,-34.16,162.90,171.82,162.27,154.52,12763.31,1360.48,7473.91,2872.53,6069.90,4104.90,9849.74,2887.67,23805.63,2208.35,19292.02,65673.00,3375.96,14.62,1749.65,,301.93,2028.94,125.02,1384.94,13894.40,4873.04,1726.11,2745.00,1535.50,48.70,1835.00,777.00,0.13,1.29,0.58,0.07,0.12,25.82,1.66,81.62,333.98,345.10,369.57,297.95,370.26,351.45,675.40,703.98,140.69,162.95,14367.22,5252.63,7271.88,6294.08,447.54,1849.90,1130.00,1463.83,2265.00,296.00,2456.50,4230.50,238.20,4232.90,230.60,2630.00,167.77,320.51,323.68,589.10,220.94,407.78,333.46,429.39,253.91,138.33,135.01,8639.73,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1
6,2011-05-06,150.10,151.47,151.97,149.53,-0.21,150.97,151.93,154.44,154.37,152.09,-2.28,3.74,161.93,146.96,150.33,153.96,157.22,151.04,149.10,5.00,33.60,31.12,41.10,27.52,23.21,18.85,25.26,-91.32,-117.15,2.55,1.70,36.79,-2.22,-0.56,-100.00,-3.68,-2.37,-32.80,161.49,168.51,156.47,149.84,12638.74,1340.20,7492.25,2827.56,5976.77,4058.01,9859.20,2863.89,23159.14,2147.45,18518.81,64417.00,3315.84,18.40,1681.01,,301.98,1936.29,109.13,1359.27,13566.60,4743.04,1644.97,2649.55,1486.50,34.20,1789.00,721.00,0.13,0.61,0.58,0.07,0.12,25.24,1.64,80.63,320.85,343.99,368.18,292.37,371.14,345.84,636.39,696.50,139.84,163.18,13509.40,5119.11,7291.14,6175.25,443.66,1793.50,1107.20,1448.17,2199.60,292.70,2411.90,4133.20,231.60,4112.70,224.90,2580.90,168.58,318.01,323.74,556.46,216.69,410.12,328.88,425.32,244.88,134.36,134.12,8425.90,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1
7,2011-05-11,147.88,150.00,150.00,147.65,-1.68,150.19,151.15,152.66,153.29,150.90,-2.39,2.67,158.00,147.32,148.62,152.72,156.05,149.76,146.88,4.99,31.50,20.35,19.93,34.24,23.64,20.55,25.99,-96.56,-159.65,2.46,1.66,38.18,-3.26,-0.90,-92.86,-3.43,-2.22,-36.99,160.79,166.84,153.62,145.47,12630.03,1342.08,7494.11,2845.06,5976.00,4058.08,9864.26,2883.89,23291.80,2166.63,18584.96,63775.00,3384.57,16.95,1654.78,,302.18,1908.46,112.57,1357.35,13419.74,4780.25,1644.66,2562.85,1508.00,39.18,1793.00,725.00,0.13,1.00,0.57,0.07,0.11,24.61,1.64,81.04,318.50,345.78,372.00,291.28,374.53,347.01,630.95,698.80,140.77,165.07,13409.17,5104.19,7331.29,6192.62,440.89,1797.50,1122.40,1448.96,2207.80,296.70,2437.90,4143.70,231.40,4133.90,226.80,2589.30,170.83,319.84,326.37,551.34,215.44,413.73,329.19,426.37,243.55,135.30,134.78,8428.09,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1
8,2011-05-12,142.75,145.99,146.78,140.66,-3.47,148.88,150.38,151.82,152.50,149.64,-2.86,2.80,157.41,146.23,144.71,151.93,154.67,148.63,141.75,4.95,23.52,23.84,22.71,34.24,25.57,22.11,50.78,-84.27,-301.85,2.80,1.96,34.17,-5.21,-0.53,-100.00,-3.68,-2.40,-52.96,160.56,166.28,152.68,143.92,12695.92,1348.65,7413.18,2863.04,5944.96,4023.29,9716.65,2845.11,23073.76,2122.65,18335.79,64003.00,3389.73,16.03,1630.78,,302.06,1864.95,112.98,1351.13,13389.42,4696.13,1598.90,2506.37,1489.50,32.50,1760.00,715.00,0.13,1.02,0.57,0.07,0.11,24.51,1.63,81.01,318.24,349.50,375.14,291.43,378.00,347.83,630.59,703.12,141.62,166.30,13373.31,5109.49,7398.59,6225.92,447.13,1806.90,1138.40,1458.82,2221.90,300.50,2454.40,4200.30,230.90,4161.80,225.30,2596.20,172.29,322.58,330.71,550.99,215.25,417.52,329.42,429.02,243.63,135.33,135.65,8456.18,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1
9,2011-05-13,141.46,142.39,142.52,139.25,-0.90,147.45,149.61,150.93,151.69,148.38,-3.30,3.39,157.70,144.16,142.54,151.10,153.24,147.46,140.46,4.94,22.01,14.35,19.51,0.91,27.61,23.36,54.11,-84.97,-242.00,2.85,2.02,37.78,-6.32,-0.28,-92.86,-3.85,-2.52,-55.98,160.32,165.72,151.75,142.41,12595.75,1337.77,7403.31,2828.47,5925.87,4018.85,9648.77,2871.03,23276.27,2120.08,18531.28,63235.00,3378.18,17.07,1632.23,,302.27,1866.30,113.83,1343.17,13377.16,4711.36,1637.61,2486.02,1505.75,36.20,1774.00,718.00,0.13,1.07,0.57,0.07,0.11,24.13,1.62,80.79,313.18,348.54,372.95,287.39,376.91,343.77,628.26,694.26,140.68,165.48,13262.69,5025.71,7376.64,6164.92,440.95,1778.60,1131.70,1436.11,2189.00,299.30,2426.60,4145.50,227.50,4110.30,223.50,2558.40,171.54,320.39,330.48,548.64,212.08,416.75,325.71,423.79,240.32,133.69,134.91,8371.67,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018,2021-03-15,346.05,349.40,352.42,342.06,-0.79,338.73,333.83,326.96,322.80,335.31,12.51,11.49,349.93,303.99,346.09,327.22,327.47,342.84,345.05,5.84,70.16,87.07,88.17,88.28,35.12,39.46,31.82,-17.71,146.96,9.48,2.74,60.96,4.89,-0.32,100.00,12.79,3.98,40.32,180.81,209.37,232.86,312.01,32953.46,3968.94,14461.42,13459.71,6749.70,6035.97,29766.97,3419.95,28833.76,3045.71,50395.08,114850.74,50458.30,20.03,3584.49,56497.64,720.76,1543.99,68.88,,18954.75,6773.00,2987.73,1776.95,1723.65,26.10,1213.00,2390.00,0.08,-0.58,0.04,-0.80,-0.08,83.86,1.39,109.12,504.68,892.83,1548.13,750.45,1281.23,1024.68,433.88,3600.44,170.69,313.04,8532.39,9420.48,20220.43,11022.59,3032.05,4778.50,4937.40,9113.11,5997.70,1238.70,10745.60,10925.90,1429.80,11795.80,495.40,6433.40,319.17,1345.76,678.64,395.81,573.09,1341.12,822.90,2329.81,493.20,245.04,240.79,15775.50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1
2019,2021-03-16,341.71,345.84,346.61,337.75,-1.25,340.78,334.62,328.65,324.20,336.29,12.09,11.22,351.10,306.20,343.17,328.82,329.35,344.10,340.71,5.83,65.41,72.98,84.88,54.95,34.20,39.65,22.24,-29.78,77.43,9.43,2.76,59.39,3.25,-0.47,42.86,12.79,3.95,30.81,180.60,209.01,232.26,301.13,32825.95,3962.71,14557.58,13471.57,6803.61,6055.43,29921.09,3446.73,29027.69,3067.17,50363.96,114018.78,49646.77,19.79,3589.83,56329.03,719.85,1552.81,68.39,2821.85,18874.01,6827.10,3011.30,1774.31,1735.00,26.08,1209.00,2496.00,0.08,-0.58,0.04,-0.80,-0.09,82.54,1.39,108.98,500.08,883.39,1544.34,744.59,1279.86,1012.84,421.82,3630.20,171.25,313.19,8334.46,9363.83,20226.26,10980.44,3070.32,4715.20,4937.30,9202.48,5941.90,1232.00,10659.40,10800.80,1424.30,11686.30,499.50,6299.70,319.49,1334.07,679.09,384.62,566.63,1340.61,811.09,2348.18,488.89,244.99,243.04,15669.30,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2020,2021-03-17,344.95,344.28,346.98,340.51,0.95,342.32,335.85,330.22,325.74,337.63,11.89,11.00,352.23,308.21,344.36,330.47,331.17,345.61,343.95,5.84,67.19,58.64,72.90,29.15,33.39,39.10,22.76,-20.77,84.92,9.22,2.67,60.26,3.15,0.10,42.86,12.32,3.79,34.38,180.39,208.65,231.65,289.28,33015.37,3974.12,14596.61,13525.20,6762.67,6054.82,29914.33,3445.55,29034.12,3047.50,49801.62,116549.44,49916.45,19.23,3507.91,58069.59,719.80,1493.66,68.00,2823.69,18983.10,6795.20,3019.60,1774.31,1729.65,25.85,1194.00,2488.00,0.08,-0.58,0.04,-0.80,-0.07,83.86,1.40,108.83,505.48,892.53,1552.29,747.23,1274.61,1020.42,425.37,3635.14,171.68,308.02,8423.11,9407.38,20121.65,11007.81,3107.92,4732.90,4968.50,9220.93,5945.40,1239.20,10737.70,10840.10,1432.90,11677.20,499.00,6363.60,314.29,1352.82,678.73,388.23,570.31,1335.15,820.20,2345.57,493.31,244.60,243.57,15731.15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2021,2021-03-18,348.00,349.53,356.83,347.02,0.88,344.81,337.88,331.90,327.39,339.22,11.83,10.42,352.75,311.05,346.79,332.20,333.18,347.30,347.00,5.85,68.83,51.19,60.93,21.97,33.51,39.12,35.17,-21.87,128.64,9.41,2.70,58.16,5.68,-0.16,64.29,11.92,3.64,37.65,180.18,208.28,231.02,276.68,32862.30,3915.46,14775.52,13116.17,6779.68,6062.79,30216.75,3463.07,29405.72,3066.01,49216.52,114835.43,48599.26,21.58,3504.85,57780.26,718.92,1488.72,63.28,2826.79,18836.47,6745.90,3052.26,1786.31,1725.90,26.13,1216.00,2678.00,0.08,-0.58,0.04,-0.80,-0.09,83.94,1.39,108.88,502.64,875.95,1521.51,746.00,1268.86,1012.21,405.58,3518.99,171.65,306.72,8048.75,9392.32,20108.72,10849.46,2976.22,4750.00,4809.10,8917.22,5903.60,1200.00,10391.30,10784.30,1387.40,11505.00,487.00,6326.50,313.18,1317.66,675.52,370.06,573.52,1332.61,819.78,2278.82,490.66,242.80,238.80,15589.07,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [19]:
df_cl = df_clear.interpolate(method ='linear', limit_direction ='both', limit = 50)

In [20]:
df_cl.head(2)

Unnamed: 0,Date,price,Open,Max,Min,change %,ma7,ma14,ma21,26ema,12ema,MACD,20sd,upper_band,lower_band,ema,wma,KAMA,DEMA,momentum,log_momentum,RSI,Stochk,Stochd,Stoch RSId,ADX,ADXR,DX,WillR,CCI,ATR,NATR,ULTOSC,ROC,BOP,AroonOSC,APO,PPO,CMO,ifft_3,ifft_6,ifft_9,ifft_100,dow-jones,s-p-500,dax,nasdaq-composite,ftse-100,cac-40,nikkei-225,sse-composite,hang-seng,kospi,bse-sensex,bovespa,merval,vix-index,индекс-мосбиржи,bitcoin,ifx-cbonds,ртс,нефть-brent,msci-world-index,s-p_tsx-composite,asx-200,kase-(казахстан),ux-(украина),золото-(лондон),серебро-(лондон),платина-(лондон),палладий-(лондон),LIBOR USD,LIBOR EUR,LIBOR GBP,LIBOR CHF,LIBOR JPY,morgan-stanley-nyse,GBP_USD,USD_JPY,Dow Jones Basic Materials,Dow Jones Consumer Goods,Dow Jones Consumer Services,Dow Jones Financials,Dow Jones Health Care,Dow Jones Industrials,Dow Jones Oil & Gas,Dow Jones Technology,Dow Jones Telecommunications,Dow Jones Utilities,NYSE Energy,NYSE Financials,NYSE Healthcare,NYSE TMT,Philadelphia Semiconductor Index,NASDAQ Bank,NASDAQ Biotechnology,NASDAQ Computer,NASDAQ Financial 100,NASDAQ Health Care,NASDAQ Industrial,NASDAQ Insurance,NASDAQ Internet,NASDAQ Other Finance,NASDAQ Telecommunications,NASDAQ Transportation,S&P 500 Utilities,S&P 500 Consumer Discretionary,S&P 500 Consumer Staples,S&P 500 Energy,S&P 500 Financials,S&P 500 Health Care,S&P 500 Industrials,S&P 500 Information Technology,S&P 500 Materials,S&P 500 Real Estate,S&P 500 Telecom Services,NYSE Composite,not_use_price,not_use_Open,not_use_Max,not_use_Min,not_use_change %,not_use_ma7,not_use_ma14,not_use_ma21,not_use_26ema,not_use_12ema,not_use_MACD,not_use_20sd,not_use_upper_band,not_use_lower_band,not_use_ema,not_use_wma,not_use_KAMA,not_use_DEMA,not_use_momentum,not_use_log_momentum,not_use_RSI,not_use_Stochk,not_use_Stochd,not_use_Stoch RSId,not_use_ADX,not_use_ADXR,not_use_DX,not_use_WillR,not_use_CCI,not_use_ATR,not_use_NATR,not_use_ULTOSC,not_use_ROC,not_use_BOP,not_use_AroonOSC,not_use_APO,not_use_PPO,not_use_CMO,not_use_ifft_3,not_use_ifft_6,not_use_ifft_9,not_use_ifft_100,not_use_dow-jones,not_use_s-p-500,not_use_dax,not_use_nasdaq-composite,not_use_ftse-100,not_use_cac-40,not_use_nikkei-225,not_use_sse-composite,not_use_hang-seng,not_use_kospi,not_use_bse-sensex,not_use_bovespa,not_use_merval,not_use_vix-index,not_use_индекс-мосбиржи,not_use_bitcoin,not_use_ifx-cbonds,not_use_ртс,not_use_нефть-brent,not_use_msci-world-index,not_use_s-p_tsx-composite,not_use_asx-200,not_use_kase-(казахстан),not_use_ux-(украина),not_use_золото-(лондон),not_use_серебро-(лондон),not_use_платина-(лондон),not_use_палладий-(лондон),not_use_LIBOR USD,not_use_LIBOR EUR,not_use_LIBOR GBP,not_use_LIBOR CHF,not_use_LIBOR JPY,not_use_morgan-stanley-nyse,not_use_GBP_USD,not_use_USD_JPY,not_use_Dow Jones Basic Materials,not_use_Dow Jones Consumer Goods,not_use_Dow Jones Consumer Services,not_use_Dow Jones Financials,not_use_Dow Jones Health Care,not_use_Dow Jones Industrials,not_use_Dow Jones Oil & Gas,not_use_Dow Jones Technology,not_use_Dow Jones Telecommunications,not_use_Dow Jones Utilities,not_use_NYSE Energy,not_use_NYSE Financials,not_use_NYSE Healthcare,not_use_NYSE TMT,not_use_Philadelphia Semiconductor Index,not_use_NASDAQ Bank,not_use_NASDAQ Biotechnology,not_use_NASDAQ Computer,not_use_NASDAQ Financial 100,not_use_NASDAQ Health Care,not_use_NASDAQ Industrial,not_use_NASDAQ Insurance,not_use_NASDAQ Internet,not_use_NASDAQ Other Finance,not_use_NASDAQ Telecommunications,not_use_NASDAQ Transportation,not_use_S&P 500 Utilities,not_use_S&P 500 Consumer Discretionary,not_use_S&P 500 Consumer Staples,not_use_S&P 500 Energy,not_use_S&P 500 Financials,not_use_S&P 500 Health Care,not_use_S&P 500 Industrials,not_use_S&P 500 Information Technology,not_use_S&P 500 Materials,not_use_S&P 500 Real Estate,not_use_S&P 500 Telecom Services,not_use_NYSE Composite,not_use_sum,not_use
0,2011-04-07,162.4,161.99,164.4,161.57,0.32,160.0,159.52,159.05,160.26,159.92,-0.33,1.98,163.0,155.1,161.91,159.48,161.09,158.3,161.4,5.08,55.3,67.38,60.5,75.42,16.64,19.11,12.96,-22.55,180.86,2.79,1.72,49.71,1.56,0.14,100.0,-0.19,-0.12,10.6,166.15,179.38,176.12,157.1,12409.49,1333.51,7195.96,2796.14,6007.37,4028.3,9590.93,3008.07,24281.8,2122.14,19591.18,69176.0,3456.52,17.11,1844.33,,300.63,2089.52,122.67,1345.2,14107.77,4908.13,1786.02,2793.62,1459.5,39.51,1780.0,778.0,0.16,0.52,0.57,0.07,0.11,27.62,1.63,84.89,331.54,334.04,359.21,299.4,350.19,345.02,677.06,684.75,137.12,158.48,14374.53,5233.29,6876.99,6137.98,443.25,1854.6,1061.9,1425.42,2283.5,283.4,2392.8,4275.3,229.7,4265.8,223.2,2536.6,162.86,310.54,313.17,589.13,223.63,385.7,326.75,418.06,252.23,129.99,131.86,8489.33,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1
1,2011-04-08,160.96,163.49,163.62,160.82,-0.89,160.27,159.58,159.08,160.31,160.08,-0.23,1.99,163.06,155.11,161.28,159.54,161.09,158.5,159.96,5.07,51.26,72.47,67.03,84.97,16.08,18.31,8.7,-38.78,112.1,2.79,1.73,45.24,1.89,-0.9,50.0,0.07,0.04,2.53,165.92,178.85,175.12,156.22,12380.05,1328.17,7217.02,2780.42,6055.75,4061.91,9768.08,3030.02,24396.07,2127.97,19451.45,68718.0,3480.39,17.87,1855.97,,300.75,2123.56,126.65,1351.43,14208.43,4940.57,1794.13,2779.61,1469.5,40.22,1803.0,798.0,0.15,0.51,0.57,0.07,0.11,27.25,1.64,84.75,330.65,332.35,357.18,296.8,350.32,341.94,679.09,680.86,137.29,158.15,14459.38,5213.36,6885.69,6134.44,439.58,1833.9,1062.2,1417.53,2263.2,282.7,2377.9,4255.6,230.5,4237.0,222.1,2499.2,162.47,308.81,312.26,591.18,221.7,385.92,324.01,415.54,250.66,128.91,132.2,8483.94,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1


In [21]:
colsToDrop = df_cl.columns[len(keys):]
df_cl = df_cl.drop(colsToDrop, axis=1)

In [22]:
df_cl.info(max_cols=120)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2023 entries, 0 to 2022
Data columns (total 117 columns):
 #   Column                            Non-Null Count  Dtype         
---  ------                            --------------  -----         
 0   Date                              2023 non-null   datetime64[ns]
 1   price                             2023 non-null   float64       
 2   Open                              2023 non-null   float64       
 3   Max                               2023 non-null   float64       
 4   Min                               2023 non-null   float64       
 5   change %                          2023 non-null   float64       
 6   ma7                               2023 non-null   float64       
 7   ma14                              2023 non-null   float64       
 8   ma21                              2023 non-null   float64       
 9   26ema                             2023 non-null   float64       
 10  12ema                             2023 non-null

Saving the cleaned dataset

In [23]:
df_cl.to_csv(DATA_PATH+'Data_Cleaner_!!clear!!' + REL +'.csv', index=None)

In [24]:
df_cl.shape

(2023, 117)

Data normalization

In [25]:
# Normalizing the cleaned dataset
df_norm = mgf.df_normalize(df_cl, 1)
df_norm.head(2)

Unnamed: 0,Date,price,Open,Max,Min,change %,ma7,ma14,ma21,26ema,12ema,MACD,20sd,upper_band,lower_band,ema,wma,KAMA,DEMA,momentum,log_momentum,RSI,Stochk,Stochd,Stoch RSId,ADX,ADXR,DX,WillR,CCI,ATR,NATR,ULTOSC,ROC,BOP,AroonOSC,APO,PPO,CMO,ifft_3,ifft_6,ifft_9,ifft_100,dow-jones,s-p-500,dax,nasdaq-composite,ftse-100,cac-40,nikkei-225,sse-composite,hang-seng,kospi,bse-sensex,bovespa,merval,vix-index,индекс-мосбиржи,bitcoin,ifx-cbonds,ртс,нефть-brent,msci-world-index,s-p_tsx-composite,asx-200,kase-(казахстан),ux-(украина),золото-(лондон),серебро-(лондон),платина-(лондон),палладий-(лондон),LIBOR USD,LIBOR EUR,LIBOR GBP,LIBOR CHF,LIBOR JPY,morgan-stanley-nyse,GBP_USD,USD_JPY,Dow Jones Basic Materials,Dow Jones Consumer Goods,Dow Jones Consumer Services,Dow Jones Financials,Dow Jones Health Care,Dow Jones Industrials,Dow Jones Oil & Gas,Dow Jones Technology,Dow Jones Telecommunications,Dow Jones Utilities,NYSE Energy,NYSE Financials,NYSE Healthcare,NYSE TMT,Philadelphia Semiconductor Index,NASDAQ Bank,NASDAQ Biotechnology,NASDAQ Computer,NASDAQ Financial 100,NASDAQ Health Care,NASDAQ Industrial,NASDAQ Insurance,NASDAQ Internet,NASDAQ Other Finance,NASDAQ Telecommunications,NASDAQ Transportation,S&P 500 Utilities,S&P 500 Consumer Discretionary,S&P 500 Consumer Staples,S&P 500 Energy,S&P 500 Financials,S&P 500 Health Care,S&P 500 Industrials,S&P 500 Information Technology,S&P 500 Materials,S&P 500 Real Estate,S&P 500 Telecom Services,NYSE Composite
0,2011-04-07,0.29,0.28,0.28,0.29,0.43,0.27,0.27,0.27,0.28,0.27,0.55,0.04,0.26,0.31,0.28,0.27,0.28,0.26,0.29,0.45,0.53,0.68,0.62,0.75,0.11,0.14,0.15,0.77,0.81,0.07,0.06,0.5,0.62,0.57,1.0,0.57,0.64,0.53,0.45,0.56,0.5,0.25,0.08,0.07,0.22,0.04,0.37,0.37,0.06,0.33,0.48,0.38,0.12,0.36,0.03,0.11,0.26,,0.0,0.98,0.96,0.15,0.38,0.32,0.44,1.0,0.4,0.75,0.93,0.13,0.05,0.47,0.79,1.0,0.96,0.21,0.85,0.18,0.38,0.05,0.04,0.15,0.04,0.11,0.73,0.03,0.22,0.05,0.82,0.27,0.05,0.17,0.04,0.14,0.04,0.02,0.14,0.04,0.04,0.09,0.03,0.12,0.16,0.13,0.04,0.04,0.05,0.73,0.16,0.04,0.13,0.03,0.22,0.12,0.12,0.2
1,2011-04-08,0.28,0.29,0.28,0.29,0.39,0.28,0.27,0.27,0.28,0.27,0.55,0.04,0.26,0.31,0.28,0.27,0.28,0.26,0.28,0.44,0.48,0.73,0.69,0.85,0.1,0.13,0.1,0.61,0.71,0.07,0.06,0.44,0.63,0.05,0.75,0.58,0.65,0.48,0.45,0.55,0.49,0.25,0.07,0.07,0.22,0.04,0.39,0.38,0.07,0.34,0.48,0.38,0.12,0.36,0.03,0.12,0.26,,0.0,1.0,1.0,0.16,0.39,0.33,0.44,0.99,0.41,0.77,0.95,0.14,0.04,0.47,0.79,1.0,0.96,0.2,0.86,0.18,0.37,0.04,0.04,0.15,0.04,0.1,0.73,0.03,0.22,0.05,0.83,0.27,0.05,0.17,0.04,0.14,0.04,0.02,0.13,0.04,0.04,0.08,0.03,0.12,0.16,0.13,0.04,0.04,0.05,0.74,0.16,0.04,0.13,0.03,0.21,0.12,0.12,0.19


Saving the normalized dataset

In [26]:
df_norm.to_csv(DATA_PATH + 'Data_Cleaner_!!norm_cl!!' + REL +'.csv', index=None)