In [2]:
import numpy as np
import pandas as pd
from tqdm import tqdm_notebook
import warnings
warnings.filterwarnings("ignore")
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import r2_score,matthews_corrcoef,accuracy_score, confusion_matrix, recall_score, roc_auc_score, precision_score, f1_score
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.base import clone
import random
import math
from datetime import timedelta
import pickle

In [5]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [9]:
df.shape

(4632, 14106)

In [11]:
df = pd.read_parquet('rus_volume_201120.parquet', engine='pyarrow')
display(df.tail())

Unnamed: 0,date,aa,date.1,ino,date.2,axp,date.3,vz,date.4,spwh,date.5,ba,date.6,sabr,date.7,pep,date.8,cat,date.9,jpm,date.10,cvx,date.11,mbcn,date.12,apyx,date.13,ko,date.14,cfrx,date.15,dis,date.16,afmd,date.17,xom,date.18,ge,date.19,hpq,date.20,fc,date.21,hd,date.22,ibm,date.23,go,date.24,jnj,date.25,psnl,date.26,mcd,date.27,rpd,date.28,prvl,date.29,mrk,date.30,bcel,date.31,mmm,date.32,bac,date.33,akro,date.34,pfe,date.35,pg,date.36,t,date.37,trv,date.38,rtx,date.39,wmt,date.40,csco,date.41,intc,date.42,msft,date.43,sam,date.44,c,date.45,aig,date.46,hon,date.47,efc,date.48,mo,date.49,clpr,date.50,ip,date.51,abt,date.52,afl,date.53,apd,date.54,gmed,date.55,aep,date.56,reta,date.57,hes,date.58,adm,date.59,adp,date.60,azo,date.61,avy,date.62,pine,date.63,vrsk,date.64,bll,date.65,mbio,date.66,bk,date.67,bax,date.68,bdx,date.69,brk/b,date.70,bby,date.71,hrb,date.72,bsx,date.73,zts,date.74,bmy,date.75,bf/b,date.76,cog,date.77,cpb,date.78,ccl,date.79,lumn,date.80,bprn,date.81,clf,date.82,clx,date.83,cms,date.84,ceix,date.85,cl,date.86,cma,date.87,cag,date.88,ed,date.89,big,date.90,glw,date.91,cmi,date.92,dhr,date.93,tgt,date.94,de,date.95,d,date.96,dov,date.97,duk,date.98,etn,date.99,ecl,date.100,uihc,date.101,pki,date.102,emr,date.103,eog,date.104,etr,date.105,efx,date.106,eqt,date.107,fdx,date.108,m,date.109,fmc,date.110,f,date.111,lnth,date.112,nee,date.113,catb,date.114,ben,date.115,hasi,date.116,prpl,date.117,fcx,date.118,myok,date.119,fenc,date.120,tgna,date.121,gps,date.122,valu,date.123,gd,date.124,gis,...,date.6928,lcbm,date.6929,2298343q,date.6930,rgx,date.6931,rita,date.6932,ulab,date.6933,0893262d,date.6934,764256q,date.6935,cmkc,date.6936,139663q,date.6937,531621q,date.6938,1619175d,date.6939,2679945q,date.6940,sfam,date.6941,descq,date.6942,nuhc,date.6943,qrsi,date.6944,hncs,date.6945,inmtq,date.6946,0544801d,date.6947,bbr,date.6948,nthwq,date.6949,dryr,date.6950,597902q,date.6951,oei,date.6952,virc,date.6953,735429q,date.6954,3610812q,date.6955,amfh,date.6956,haki,date.6957,iric,date.6958,dian,date.6959,2274839q,date.6960,943454q,date.6961,apwrq,date.6962,aawhq,date.6963,1030770q,date.6964,586928q,date.6965,pzl,date.6966,119240q,date.6967,civ,date.6968,2250091q,date.6969,1742858d,date.6970,rdrtq,date.6971,atisz,date.6972,stlw,date.6973,tcahq,date.6974,1429742d,date.6975,cpnt,date.6976,728560q,date.6977,ausfq,date.6978,jdn,date.6979,264526q,date.6980,562424q,date.6981,0824271d,date.6982,567002q,date.6983,1514711d,date.6984,3683645q,date.6985,3518429q,date.6986,1293318d,date.6987,tmcs,date.6988,137614q,date.6989,2484292q,date.6990,prgnq,date.6991,algxq,date.6992,actrq,date.6993,inrg,date.6994,inkt,date.6995,nxtv,date.6996,aklmq,date.6997,qdhc,date.6998,avgn,date.6999,psta,date.7000,913636q,date.7001,0154604d,date.7002,gsbi,date.7003,lfed,date.7004,944951q,date.7005,fba,date.7006,acrtq,date.7007,vibc,date.7008,727132q,date.7009,cebc,date.7010,mvbi,date.7011,mdbk,date.7012,3129880q,date.7013,0246600q,date.7014,vbnj,date.7015,memh,date.7016,gtn/a,date.7017,qfabq,date.7018,embx,date.7019,9474176q,date.7020,345605q,date.7021,597385q,date.7022,797725q,date.7023,mcsiq,date.7024,atahq,date.7025,dztkq,date.7026,tsicq,date.7027,buca,date.7028,mslv,date.7029,cod,date.7030,trkn,date.7031,tsty,date.7032,cmpp,date.7033,662053q,date.7034,4130338q,date.7035,rmht,date.7036,prbz,date.7037,fmxlq,date.7038,smra,date.7039,moss,date.7040,faooq,date.7041,zomx,date.7042,itxc,date.7043,874896q,date.7044,qvdx,date.7045,1638709d,date.7046,2245546q,date.7047,camz,date.7048,1529158d,date.7049,itru,date.7050,ddicq,date.7051,343829q,date.7052,imny
4628,2020-11-16,6564812,NaT,,2020-11-16,5339095.0,2020-11-16,14289351.0,NaT,,2020-11-16,40562836,NaT,,2020-11-16,3398971,2020-11-16,3902350,2020-11-16,19858562,2020-11-16,18980208,NaT,,NaT,,2020-11-16,14908181,NaT,,2020-11-16,16382489,NaT,,2020-11-16,38596815,2020-11-16,118771994,2020-11-16,11140238,NaT,,2020-11-16,5360160,2020-11-16,5293385,NaT,,2020-11-16,6266384,NaT,,2020-11-16,3702131,NaT,,NaT,,2020-11-16,9068171,NaT,,2020-11-16,2434259.0,2020-11-16,57396669,NaT,,2020-11-16,71660458,2020-11-16,7976828,2020-11-16,49948993,2020-11-16,1163462.0,2020-11-16,12885034,2020-11-16,11520137,2020-11-16,34572647,2020-11-16,36306293,2020-11-16,24953344,2020-11-16,168954.0,2020-11-16,26699359,2020-11-16,6015438.0,2020-11-16,4898808.0,NaT,,2020-11-16,7296003,NaT,,2020-11-16,4005789.0,2020-11-16,4144862,2020-11-16,5341114.0,2020-11-16,2201756.0,NaT,,2020-11-16,4201080.0,NaT,,2020-11-16,3220269.0,2020-11-16,2765600.0,2020-11-16,2322655.0,2020-11-16,346738.0,2020-11-16,513566.0,NaT,,NaT,,2020-11-16,1557312.0,NaT,,2020-11-16,4664856,2020-11-16,3391243.0,2020-11-16,1575182.0,NaT,,2020-11-16,3306999,2020-11-16,4663875,2020-11-16,7849749.0,NaT,,2020-11-16,7199552,2020-11-16,581148.0,2020-11-16,7033889,2020-11-16,2668159.0,NaT,,2020-11-16,9663430.0,NaT,,2020-11-16,13086179,2020-11-16,1587974.0,2020-11-16,2297609.0,NaT,,2020-11-16,3701455.0,2020-11-16,2310539.0,2020-11-16,2513757.0,2020-11-16,2330416.0,2020-11-16,999008.0,2020-11-16,4042923.0,2020-11-16,1313965,2020-11-16,2151247.0,2020-11-16,5035611,2020-11-16,1927580.0,2020-11-16,3697034.0,2020-11-16,944043.0,2020-11-16,2843021.0,2020-11-16,1913599.0,2020-11-16,839639.0,NaT,,2020-11-16,832471,2020-11-16,2742274.0,2020-11-16,6992991.0,2020-11-16,1275282.0,2020-11-16,1023731.0,2020-11-16,5999765.0,2020-11-16,2474834,2020-11-16,38710168,2020-11-16,537690.0,2020-11-16,84853863,NaT,,2020-11-16,7667558,NaT,,2020-11-16,3534666.0,NaT,,NaT,,2020-11-16,20641998,NaT,,NaT,,2020-11-16,2736698.0,2020-11-16,13292918,NaT,,2020-11-16,1605494.0,2020-11-16,3620650.0,...,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,
4629,2020-11-17,5443321,NaT,,2020-11-17,4023241.0,2020-11-17,11698188.0,NaT,,2020-11-17,38299287,NaT,,2020-11-17,2924071,2020-11-17,2111211,2020-11-17,14553245,2020-11-17,14329013,NaT,,NaT,,2020-11-17,14248362,NaT,,2020-11-17,9354936,NaT,,2020-11-17,26521077,2020-11-17,90348624,2020-11-17,12321516,NaT,,2020-11-17,8532219,2020-11-17,4134455,NaT,,2020-11-17,7297736,NaT,,2020-11-17,2443547,NaT,,NaT,,2020-11-17,9073571,NaT,,2020-11-17,2589981.0,2020-11-17,46402309,NaT,,2020-11-17,46984500,2020-11-17,6396807,2020-11-17,28575410,2020-11-17,1217997.0,2020-11-17,11134174,2020-11-17,14237213,2020-11-17,23140911,2020-11-17,31072061,2020-11-17,24154112,2020-11-17,131233.0,2020-11-17,18969994,2020-11-17,4627031.0,2020-11-17,3447826.0,NaT,,2020-11-17,5204776,NaT,,2020-11-17,5626640.0,2020-11-17,3793137,2020-11-17,4739526.0,2020-11-17,2218910.0,NaT,,2020-11-17,3052660.0,NaT,,2020-11-17,2845596.0,2020-11-17,1836380.0,2020-11-17,1249116.0,2020-11-17,293742.0,2020-11-17,556536.0,NaT,,NaT,,2020-11-17,1519468.0,NaT,,2020-11-17,3870854,2020-11-17,3877198.0,2020-11-17,1128959.0,NaT,,2020-11-17,2740898,2020-11-17,3106614,2020-11-17,34678265.0,NaT,,2020-11-17,8610180,2020-11-17,437152.0,2020-11-17,4028532,2020-11-17,1841490.0,NaT,,2020-11-17,9659316.0,NaT,,2020-11-17,8478916,2020-11-17,1433332.0,2020-11-17,1770394.0,NaT,,2020-11-17,3406784.0,2020-11-17,1269371.0,2020-11-17,2779872.0,2020-11-17,3696033.0,2020-11-17,515826.0,2020-11-17,2818579.0,2020-11-17,952446,2020-11-17,2400076.0,2020-11-17,6299877,2020-11-17,1411215.0,2020-11-17,3818286.0,2020-11-17,600426.0,2020-11-17,3178659.0,2020-11-17,1307617.0,2020-11-17,873966.0,NaT,,2020-11-17,1516623,2020-11-17,2269819.0,2020-11-17,5393466.0,2020-11-17,1239838.0,2020-11-17,724102.0,2020-11-17,3879216.0,2020-11-17,2816659,2020-11-17,51175190,2020-11-17,1227851.0,2020-11-17,46754509,NaT,,2020-11-17,5866315,NaT,,2020-11-17,3153809.0,NaT,,NaT,,2020-11-17,17941922,NaT,,NaT,,2020-11-17,2797775.0,2020-11-17,7626720,NaT,,2020-11-17,1241745.0,2020-11-17,3657939.0,...,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,
4630,2020-11-18,6255259,NaT,,2020-11-18,3264263.0,2020-11-18,11698771.0,NaT,,2020-11-18,65990690,NaT,,2020-11-18,4335257,2020-11-18,3165091,2020-11-18,12571217,2020-11-18,10583713,NaT,,NaT,,2020-11-18,15278206,NaT,,2020-11-18,9456229,NaT,,2020-11-18,31068017,2020-11-18,148865809,2020-11-18,9110311,NaT,,2020-11-18,5122219,2020-11-18,4606828,NaT,,2020-11-18,6661008,NaT,,2020-11-18,3095154,NaT,,NaT,,2020-11-18,8280683,NaT,,2020-11-18,2615812.0,2020-11-18,54416692,NaT,,2020-11-18,63442680,2020-11-18,6097357,2020-11-18,29558856,2020-11-18,1333166.0,2020-11-18,13392598,2020-11-18,8312289,2020-11-18,21327707,2020-11-18,32817918,2020-11-18,28372789,2020-11-18,94145.0,2020-11-18,25823970,2020-11-18,6265071.0,2020-11-18,4422376.0,NaT,,2020-11-18,7850456,NaT,,2020-11-18,3022942.0,2020-11-18,4545668,2020-11-18,5550996.0,2020-11-18,2028877.0,NaT,,2020-11-18,2861594.0,NaT,,2020-11-18,2688382.0,2020-11-18,2332547.0,2020-11-18,1326001.0,2020-11-18,182410.0,2020-11-18,480392.0,NaT,,NaT,,2020-11-18,2197098.0,NaT,,2020-11-18,7207393,2020-11-18,3946100.0,2020-11-18,1860237.0,NaT,,2020-11-18,2963118,2020-11-18,2330818,2020-11-18,22512336.0,NaT,,2020-11-18,12455274,2020-11-18,657595.0,2020-11-18,5634686,2020-11-18,2543398.0,NaT,,2020-11-18,8031449.0,NaT,,2020-11-18,9030001,2020-11-18,1910521.0,2020-11-18,2768333.0,NaT,,2020-11-18,4168487.0,2020-11-18,1925937.0,2020-11-18,2775754.0,2020-11-18,2034911.0,2020-11-18,905267.0,2020-11-18,2736607.0,2020-11-18,987755,2020-11-18,2873774.0,2020-11-18,13092322,2020-11-18,1618516.0,2020-11-18,5083691.0,2020-11-18,750133.0,2020-11-18,2949150.0,2020-11-18,1282111.0,2020-11-18,828274.0,NaT,,2020-11-18,938127,2020-11-18,2559466.0,2020-11-18,5104298.0,2020-11-18,1323008.0,2020-11-18,894744.0,2020-11-18,5141751.0,2020-11-18,2874823,2020-11-18,49357285,2020-11-18,687820.0,2020-11-18,77759269,NaT,,2020-11-18,6502247,NaT,,2020-11-18,3580796.0,NaT,,NaT,,2020-11-18,12593082,NaT,,NaT,,2020-11-18,1412983.0,2020-11-18,6304379,NaT,,2020-11-18,1536315.0,2020-11-18,4061821.0,...,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,
4631,2020-11-19,5024584,NaT,,2020-11-19,3249828.0,2020-11-19,12662962.0,NaT,,2020-11-19,34236071,NaT,,2020-11-19,4263568,2020-11-19,2012068,2020-11-19,10202538,2020-11-19,8820183,NaT,,NaT,,2020-11-19,17189549,NaT,,2020-11-19,8892462,NaT,,2020-11-19,26954361,2020-11-19,87177512,2020-11-19,5731894,NaT,,2020-11-19,3929422,2020-11-19,3439648,NaT,,2020-11-19,7987716,NaT,,2020-11-19,2840460,NaT,,NaT,,2020-11-19,6714806,NaT,,2020-11-19,2986012.0,2020-11-19,46422959,NaT,,2020-11-19,43261829,2020-11-19,5284085,2020-11-19,41380974,2020-11-19,1118795.0,2020-11-19,8928957,2020-11-19,6196868,2020-11-19,22177706,2020-11-19,29469084,2020-11-19,24792746,2020-11-19,83835.0,2020-11-19,20262350,2020-11-19,4225237.0,2020-11-19,3263013.0,NaT,,2020-11-19,8198094,NaT,,2020-11-19,3474422.0,2020-11-19,3548144,2020-11-19,3475489.0,2020-11-19,1340474.0,NaT,,2020-11-19,4399203.0,NaT,,2020-11-19,1716411.0,2020-11-19,1603227.0,2020-11-19,1110587.0,2020-11-19,244492.0,2020-11-19,509739.0,NaT,,NaT,,2020-11-19,1827093.0,NaT,,2020-11-19,4173379,2020-11-19,2622816.0,2020-11-19,1439467.0,NaT,,2020-11-19,1728442,2020-11-19,1585420,2020-11-19,14816271.0,NaT,,2020-11-19,9025456,2020-11-19,961325.0,2020-11-19,6767711,2020-11-19,966066.0,NaT,,2020-11-19,9530193.0,NaT,,2020-11-19,8441591,2020-11-19,1558938.0,2020-11-19,2311805.0,NaT,,2020-11-19,4400935.0,2020-11-19,1449928.0,2020-11-19,2622865.0,2020-11-19,1723012.0,2020-11-19,634685.0,2020-11-19,2319015.0,2020-11-19,872594,2020-11-19,2770508.0,2020-11-19,6834937,2020-11-19,1700393.0,2020-11-19,6147937.0,2020-11-19,473281.0,2020-11-19,3368591.0,2020-11-19,2244538.0,2020-11-19,802698.0,NaT,,2020-11-19,767368,2020-11-19,1563387.0,2020-11-19,4186730.0,2020-11-19,1048986.0,2020-11-19,691604.0,2020-11-19,6067105.0,2020-11-19,2374950,2020-11-19,64717647,2020-11-19,366958.0,2020-11-19,56672693,NaT,,2020-11-19,7392277,NaT,,2020-11-19,3678006.0,NaT,,NaT,,2020-11-19,13501311,NaT,,NaT,,2020-11-19,1703370.0,2020-11-19,5229709,NaT,,2020-11-19,1413303.0,2020-11-19,2551564.0,...,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,
4632,2020-11-20,50,NaT,,NaT,,NaT,,NaT,,2020-11-20,11837,NaT,,2020-11-20,44,2020-11-20,6,2020-11-20,804,2020-11-20,382,NaT,,NaT,,2020-11-20,2331,NaT,,2020-11-20,1514,NaT,,2020-11-20,6258,2020-11-20,26508,2020-11-20,1,NaT,,2020-11-20,218,2020-11-20,1,NaT,,2020-11-20,359,NaT,,2020-11-20,284,NaT,,NaT,,2020-11-20,38,NaT,,NaT,,2020-11-20,4234,NaT,,2020-11-20,143468,2020-11-20,282,2020-11-20,2006,NaT,,2020-11-20,1420,2020-11-20,346,2020-11-20,944,2020-11-20,2564,2020-11-20,12137,NaT,,2020-11-20,1420,NaT,,NaT,,NaT,,2020-11-20,1,NaT,,NaT,,2020-11-20,403,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,2020-11-20,6997,NaT,,NaT,,NaT,,2020-11-20,1,2020-11-20,100,NaT,,NaT,,2020-11-20,888,NaT,,2020-11-20,100,NaT,,NaT,,NaT,,NaT,,2020-11-20,4964,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,2020-11-20,100,NaT,,2020-11-20,209,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,2020-11-20,2,NaT,,NaT,,NaT,,NaT,,NaT,,2020-11-20,346,2020-11-20,22790,NaT,,2020-11-20,8261,NaT,,2020-11-20,827,NaT,,NaT,,NaT,,NaT,,2020-11-20,4938,NaT,,NaT,,NaT,,2020-11-20,6,NaT,,NaT,,NaT,,...,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,,NaT,


In [2]:
df = pd.read_parquet('rus_dates_231120.parquet', engine='pyarrow')
#filename = "spx_nosurv_191020.xlsx"
#df = pd.read_excel(filename, sheet_name='rawdates')

num_cols = df.shape[1]

rep_dfs = {}
for x in range(0, num_cols, 2):
    idf = df.iloc[:,x:x+2]
    idf = idf.set_index(idf.columns[0]).dropna()
    idf[idf.columns[0]] = pd.to_datetime(idf[idf.columns[0]].astype(int), format="%Y%m%d")
    idf['rep_date'] = 1
    rep_dfs[idf.columns[0]] = idf.drop_duplicates(subset=idf.columns[0]).set_index(idf.columns[0])

df = pd.read_parquet('rus_close_201120.parquet', engine='pyarrow')
#df = pd.read_excel(filename, sheet_name='price')

pr_dfs = {}
for x in range(0, num_cols, 2):
    idf = df.iloc[:,x:x+2]
    idf = idf.set_index(idf.columns[0]).dropna()
    pr_dfs[idf.columns[0]] = idf
    
df = pd.read_parquet('rus_volume_201120.parquet', engine='pyarrow')
#df = pd.read_excel(filename, sheet_name='volume')

vol_dfs = {}
for x in range(0, num_cols, 2):
    idf = df.iloc[:,x:x+2]
    idf = idf.set_index(idf.columns[0]).dropna()
    vol_dfs[idf.columns[0]] = idf

In [3]:
combined = {}
for k, v in pr_dfs.items():
    try:
        idf = v.copy()
        idf['rep_date'] = rep_dfs[k]
        idf['volume'] = vol_dfs[k]
        idf = idf.dropna(subset=['volume'])
        idf['volume'] = idf['volume'].astype(int)
        #idf['volume'] = np.log(idf['volume'])
        idf['volume'] = (idf['volume']-idf['volume'].cummin())/(idf['volume'].cummax()-idf['volume'].cummin())
        #print(k)
        #print(v.index)
        #idf['vix'] = vix.loc[v.index[0]:v.index[-1]]
#         idf['impvol'] = impvol_dfs[k]
#         idf = idf.dropna(subset=['impvol'])
#         idf['impvol'] = idf['impvol'].astype(float)
#         idf['daymove'] = daymove_dfs[k]
#         idf = idf.dropna(subset=['daymove'])
#         idf['daymove'] = idf['daymove'].astype(float)
        combined[k] = idf
    except:
        print(k)

usp
usc
usbn
usfi
usca
1640755d


In [4]:
b4rep_window = 10
thr = 0.65


In [5]:
def compress(series, sides=0.01):
    
    series = pd.Series(np.where( series > series.quantile(q=(1-sides)) , series.quantile(q=(1-sides)), series ), 
                      index=series.index)
    series = np.where(series < series.quantile(q=sides), series.quantile(q=sides), series)
    return series

def compress_right_tail(series, sides=0.01):
    
    series = pd.Series(np.where( series > series.quantile(q=(1-sides)) , series.quantile(q=(1-sides)), series ), 
                      index=series.index)
    return series

def create_features(dtf, tick, lead=5, ret_lag=[1,2,5,10,20,44,261], tr=0.025, vol_lag=[1,2,5,10,20,44,261], vol_window=100, sides=0.01):
    
    feat_cols = []
#    dtf['volume'] = np.log(dtf['volume'])

#    dtf['volume'] = (dtf['volume']-dtf['volume'].cummin())/(dtf['volume'].cummax()-dtf['volume'].cummin())

    avg_vol_name = 'avg_vol_' + str(vol_window)
    dtf[avg_vol_name] = dtf['volume'].rolling(window=vol_window).mean()
    dtf[avg_vol_name] = np.log(dtf[avg_vol_name])
#     avg_range_name = 'avg_range_' + str(vol_window)
#     dtf[avg_range_name] = np.abs(dtf['high'].rolling(window=vol_window).mean()-dtf['low'].rolling(window=vol_window).mean())
#     dtf[avg_range_name] = compress(dtf[avg_range_name], sides=sides)

# #     avg_low_name = 'avg_low_' + str(vol_window)
# #     dtf[avg_low_name] = dtf[tick]-dtf['low'].rolling(window=vol_window).mean()
# #     avg_high_name = 'avg_high_' + str(vol_window)
# #     dtf[avg_high_name] = dtf['high'].rolling(window=vol_window).mean()-dtf[tick]
    
#     for avg in vol_avgs:

#         name_simple = 'open'+str(avg)
#         dtf[name_simple] = dtf['open'].rolling(window=avg).mean()
#         dtf[name_simple] = np.log(dtf[name_simple])
#         dtf[name_simple] = compress(dtf[name_simple], sides=sides)
#         dtf[name_simple] = (dtf[name_simple]-dtf[name_simple].min()) / (dtf[name_simple].max() - dtf[name_simple].min())
#         feat_cols.append(name_simple)
#         name_simple = 'high'+str(avg)
#         dtf[name_simple] = dtf['high'].rolling(window=avg).mean()
#         dtf[name_simple] = np.log(dtf[name_simple])
#         dtf[name_simple] = compress(dtf[name_simple], sides=sides)
#         dtf[name_simple] = (dtf[name_simple]-dtf[name_simple].min()) / (dtf[name_simple].max() - dtf[name_simple].min())
#         #feat_cols.append(name_simple)
#         name_simple = 'low'+str(avg)
#         dtf[name_simple] = dtf['low'].rolling(window=avg).mean()
#         dtf[name_simple] = np.log(dtf[name_simple])
#         dtf[name_simple] = compress(dtf[name_simple], sides=sides)
#         dtf[name_simple] = (dtf[name_simple]-dtf[name_simple].min()) / (dtf[name_simple].max() - dtf[name_simple].min())
#         #feat_cols.append(name_simple)
# #         name_simple = 'vwap'+str(avg)
# #         dtf[name_simple] = dtf['vwap'].rolling(window=avg).mean()
# #         dtf[name_simple] = np.log(dtf[name_simple])
# #         dtf[name_simple] = compress(dtf[name_simple], sides=sides)
# #         dtf[name_simple] = (dtf[name_simple]-dtf[name_simple].min()) / (dtf[name_simple].max() - dtf[name_simple].min())
# #         feat_cols.append(name_simple)
#         name_simple = 'ranges'+str(avg)
#         dtf[name_simple] = np.abs(dtf['high'+str(avg)]-dtf['low'+str(avg)])
#         dtf[name_simple] = compress(dtf[name_simple], sides=sides)
#         dtf[name_simple] = dtf[name_simple] / dtf[avg_range_name]
#         dtf[name_simple] = compress(dtf[name_simple], sides=sides)
#         #feat_cols.append(name_simple)


    
    
    for lag in vol_lag:
        name_simple = 'vol'+str(lag)
        dtf[name_simple] = dtf['volume'].rolling(window=lag).mean()
        dtf[name_simple] = np.log(dtf[name_simple])
        dtf[name_simple] = dtf[name_simple] / dtf[avg_vol_name]
        dtf[name_simple] = compress(dtf[name_simple], sides=sides)
        dtf[name_simple] = (dtf[name_simple]-dtf[name_simple].cummin()) / (dtf[name_simple].cummax() - dtf[name_simple].cummin())
        feat_cols.append(name_simple)
    for lag in ret_lag:
        name = 'ret'+str(lag)
        dtf[name] = dtf[tick]/dtf[tick].shift(lag)-1
        dtf[name] = compress(dtf[name], sides=sides)
        dtf[name] = (dtf[name] - dtf[name].cummin()) / (dtf[name].cummax() - dtf[name].cummin())
        feat_cols.append(name)

        
        
    dtf['tick'] = tick
    #feat_cols.append(tick)
#    dtf['fwd'] = dtf[tick].shift(-lead+1) / dtf[tick]-1
#    dtf['fwd'] = dtf[tick].shift(-lead) / dtf[tick]-1
    dtf['fwd'] = dtf[tick].shift(-lead-1) / dtf[tick]-1
    dtf['ycol'] = np.where(dtf['fwd'] >= tr, 1, 0)
    feat_cols.append('ycol')
#    dtf['mva_90'] = dtf[tick].rolling(window=90).mean()
#    dtf['y_90'] = np.where(dtf[tick] >= dtf['mva_90'], 1, 0)
#    feat_cols.append('y_90')
    return dtf, feat_cols

In [6]:
def ticker_features(combined_dtfs, ticker, growth_tr=0.02, back_window=28, b4rep_window=10,
                   ret_lag=[1,2,5,10,20,44,261], vol_lag=[1,2,5,10,20,44,261], vol_window=100, sides=0.01):

    idf = combined_dtfs[ticker]
    if combined[ticker].shape[0]==0:
        return
    else:
        idf['idate'] = idf.index
        idf['idate'] = idf['idate'].shift(1)
        idf['b4rep_window'] = idf.index
        idf['b4rep_window'] = idf['b4rep_window'].shift(b4rep_window+1)
        #print(ticker)
        idf['back_window'] = pd.to_datetime(np.where(idf.rep_date == 1, idf['idate'] - \
                                                     pd.Timedelta(days=back_window), pd.to_datetime(np.nan)))
        idf['back_window'] = idf['back_window'].fillna(method='backfill')

        idf['b4rep_window'] = pd.to_datetime(np.where(idf.rep_date == 1, idf['b4rep_window'], pd.to_datetime(np.nan)))
        idf['b4rep_window'] = idf['b4rep_window'].fillna(method='backfill')

        idf['b4rep'] = np.where( idf.idate >= idf.back_window , 1, 0 )
        idf['next_rep'] = pd.to_datetime(np.where( idf.rep_date == 1, idf.idate, pd.to_datetime(np.nan)))
        idf['next_rep'] = idf['next_rep'].fillna(method='backfill')
        idf['price_atrep'] = idf['next_rep'].map(idf[ticker])
        idf['price_b4rep'] = idf['b4rep_window'].map(idf[ticker])
        idf['b4rep_chng'] = idf['price_atrep']/idf['price_b4rep']-1

        #idf = idf[idf.b4rep == 1]#[['meli', 'volume', 'rep_date']]
        #idf['ycol'] = np.where( (idf['b4rep'] == 1) & (idf['b4rep_chng'] > growth_tr), 1, 0 )
        #idf = idf[[ticker, 'volume', 'b4rep', 'b4rep_chng']]

        #idf = create_returns(idf, ticker, lead=b4rep_window, tr=growth_tr)
        idf, feat_cols = create_features(idf, ticker, lead=b4rep_window,ret_lag=ret_lag, vol_lag=vol_lag, tr=growth_tr, 
                            vol_window=vol_window, sides=sides)
        feat_cols.append('fwd')
        feat_cols.append('tick')
        idf = idf[idf.idate == idf.b4rep_window]

        return idf[feat_cols].dropna()

In [7]:
def nasdaq_features(combined, params=None):
    
    if params == None:
        params = {
            'b4rep_window': b4rep_window, 
            'growth_tr': 0.02,
            'back_window': 28, 
            'ret_lag': [1,2,5,10,22,44,51,66,118,132,246,261,375,480,520], 
            'vol_lag': [1,2,5,10,22,44,51,66,118,132], 

            'vol_window': 100, 
            'sides': 0.01
        }
    
    df = pd.DataFrame()
    for i in combined:
        df = df.append(ticker_features(combined, i, **params))
    df = df.sort_index()
    
    return df

In [8]:
%%time
df = nasdaq_features(combined, params=None)
#### ADDED COLUMN FOR TESTING RETURNS
#df['case_id'] = np.linspace(1, df.shape[0], df.shape[0])
df.index = df.index
df['date:tick'] = df.index.astype(str)+':'+df['tick']

#### ALSO AMENDED my_tss function to account for new column
df.tail()

CPU times: user 11min 50s, sys: 43.1 s, total: 12min 33s
Wall time: 12min 35s


Unnamed: 0,vol1,vol2,vol5,vol10,vol22,vol44,vol51,vol66,vol118,vol132,...,ret132,ret246,ret261,ret375,ret480,ret520,ycol,fwd,tick,date:tick
2020-11-04,0.994088,0.95848,0.981387,1.0,1.0,0.603456,0.669694,0.559647,0.722521,0.594559,...,0.70552,0.180702,0.190553,0.306563,0.277464,0.1684,1,0.057067,cub,2020-11-04:cub
2020-11-04,0.563505,0.660507,0.735121,0.802036,0.651312,0.674908,0.748085,0.549566,0.147549,0.158246,...,1.0,0.734899,0.672051,0.436339,0.225403,0.224323,1,0.173591,lb,2020-11-04:lb
2020-11-04,0.407591,0.437258,0.433952,0.530222,0.576903,0.513732,0.498317,0.35829,0.299393,0.224228,...,0.412258,0.191474,0.157903,0.212176,0.284064,0.170022,1,0.156056,tjx,2020-11-04:tjx
2020-11-04,0.404366,0.433344,0.424887,0.487036,0.519652,0.332274,0.322564,0.245384,0.707844,0.597582,...,0.671538,0.419869,0.415999,0.584888,0.805211,0.80405,0,-0.061892,cprt,2020-11-04:cprt
2020-11-04,0.6055,0.599509,0.603569,0.631164,0.608285,0.469109,0.477734,0.339964,0.458427,0.193097,...,0.703509,0.323146,0.326635,0.301669,0.212998,0.182829,1,0.098958,jack,2020-11-04:jack


In [9]:
# save the model to disk
filename = 'rus_inference.sav'
 
# some time later...
 
# load the model from disk
estimatorCopy = pickle.load(open(filename, 'rb'))

In [10]:
estimatorCopy.classes_

array([0, 1])

In [11]:
cols=['vol1', 'vol2', 'vol5', 'vol10', 'vol22', 'vol44', 'vol51', 'vol66',
       'vol118', 'vol132', 'ret1', 'ret2', 'ret5', 'ret10', 'ret22', 'ret44',
       'ret51', 'ret66', 'ret118', 'ret132', 'ret246', 'ret261', 'ret375',
       'ret480', 'ret520']
num_companies=3000
next_reports = df['tick'][-num_companies:]
ticker_proba = {}
existing_reports = []
for ticker in next_reports:
    try:
        pred_features=combined[ticker][-1:][cols]
        ticker_proba[ticker] = estimatorCopy.predict_proba(pred_features)[:,2]
        existing_reports.append(ticker)
    except:
        print('except:')
        print(ticker)

except:
iova
except:
rev
except:
mdgl
except:
siga
except:
atri
except:
alex
except:
tdc
except:
gkos
except:
bll
except:
alco
except:
lsi
except:
prsc
except:
vsto
except:
core
except:
ndls
except:
gden
except:
zg
except:
viac
except:
nwsa
except:
immr
except:
nh
except:
genc
except:
comm
except:
lcut
except:
atrs
except:
gtn
except:
avxl
except:
gts
except:
nfg
except:
hasi
except:
pvac
except:
catm
except:
drh
except:
abmd
except:
fslr
except:
pzza
except:
selb
except:
txmd
except:
tpic
except:
aaon
except:
tast
except:
wlk
except:
roll
except:
bfs
except:
synh
except:
alny
except:
cah
except:
cndt
except:
vtol
except:
oge
except:
hlt
except:
ph
except:
plpc
except:
pcrx
except:
adnt
except:
wldn
except:
mmi
except:
flir
except:
novt
except:
flt
except:
cytk
except:
grpn
except:
swx
except:
aes
except:
bdx
except:
he
except:
amh
except:
cdxc
except:
epam
except:
pbh
except:
gpro
except:
exls
except:
atec
except:
ueic
except:
ten
except:
jll
except:
albo
except:
oled
except:
ffg
exce

except:
adbe
except:
cbrl
except:
fdx
except:
brc
except:
mlhr
except:
apog
except:
cmd
except:
ebf
except:
kbh
except:
nke
except:
neog
except:
scs
except:
ofed
except:
azo
except:
jef
except:
wor
except:
ctas
except:
ful
except:
gis
except:
rad
except:
cost
except:
mtn
except:
air
except:
jbl
except:
acn
except:
fds
except:
dri
except:
kmx
except:
camp
except:
schl
except:
flr
except:
calm
except:
fonr
except:
tho
except:
pcyg
except:
rlgt
except:
mu
except:
prgs
except:
info
except:
ango
except:
mkc
except:
snx
except:
cmtl
except:
etfc
except:
epac
except:
sgh
except:
pep
except:
idt
except:
stz
except:
cag
except:
bbby
except:
payx
except:
lndc
except:
vlgea
except:
rpm
except:
lw
except:
rgp
except:
hele
except:
ayi
except:
grif
except:
pke
except:
dpz
except:
acrx
except:
cnbka
except:
fast
except:
azz
except:
jpm
except:
dal
except:
hifs
except:
frc
except:
enz
except:
jnj
except:
voxx
except:
blk
except:
odc
except:
sbt
except:
c
except:
wina
except:
bac
except:
usb
except:
pn

except:
well
except:
cpf
except:
pb
except:
inva
except:
isbc
except:
spok
except:
essa
except:
fdp
except:
pe
except:
irt
except:
srev
except:
vnda
except:
oii
except:
ma
except:
musa
except:
bku
except:
hzo
except:
chx
except:
cmo
except:
hbnc
except:
din
except:
sigi
except:
cern
except:
jrvr
except:
eght
except:
fdbc
except:
cw
except:
aegn
except:
frta
except:
dbd
except:
grub
except:
sc
except:
amp
except:
mho
except:
etr
except:
txrh
except:
hbmd
except:
axti
except:
brkl
except:
ar
except:
kra
except:
bg
except:
now
except:
pki
except:
ba
except:
blkb
except:
gnmk
except:
wdc
except:
mrc
except:
iart
except:
spwr
except:
extr
except:
afg
except:
sri
except:
grmn
except:
tt
except:
vici
except:
bhe
except:
site
except:
tmhc
except:
ntb
except:
echo
except:
r
except:
cto
except:
caci
except:
slab
except:
nfbk
except:
etsy
except:
cgnx
except:
bgcp
except:
cbtx
except:
rol
except:
res
except:
egov
except:
ssnc
except:
gwb
except:
six
except:
pega
except:
eqc
except:
flow
except:
b

except:
cwh
except:
amrc
except:
ospn
except:
tcfc
except:
med
except:
bwxt
except:
jll
except:
vno
except:
amc
except:
apls
except:
nlsn
except:
extn
except:
leg
except:
pacb
except:
kmpr
except:
wm
except:
omi
except:
argo
except:
door
except:
fn
except:
mdlz
except:
src
except:
ir
except:
rgs
except:
hmn
except:
ste
except:
g
except:
qtnt
except:
ogs
except:
nc
except:
halo
except:
trex
except:
hznp
except:
insp
except:
cvi
except:
rpai
except:
ni
except:
frpt
except:
nsit
except:
cort
except:
lpx
except:
gtes
except:
aaic
except:
jci
except:
opch
except:
lfvn
except:
npo
except:
eaf
except:
fsp
except:
avnt
except:
pru
except:
dar
except:
w
except:
etn
except:
spke
except:
inn
except:
zbra
except:
hsc
except:
fprx
except:
arcb
except:
hy
except:
kar
except:
avns
except:
pfis
except:
gci
except:
anh
except:
cbfv
except:
ande
except:
vpg
except:
jeld
except:
nbr
except:
ke
except:
es
except:
mcy
except:
rhp
except:
akr
except:
ctlt
except:
mcs
except:
trec
except:
fi
except:
trc
exce

except:
ihc
except:
elox
except:
ci
except:
stra
except:
immr
except:
lcut
except:
comm
except:
kelya
except:
tpic
except:
chh
except:
eye
except:
wow
except:
seas
except:
coll
except:
mfa
except:
carg
except:
ths
except:
vtvt
except:
nwsa
except:
ea
except:
ocul
except:
midd
except:
bfam
except:
ttwo
except:
mchp
except:
evrg
except:
icui
except:
hska
except:
cplg
except:
kim
except:
fls
except:
mitk
except:
tds
except:
idcc
except:
bfs
except:
vtol
except:
cbay
except:
ajx
except:
nrg
except:
cpri
except:
lasr
except:
cbb
except:
evc
except:
lgf/a
except:
nymt
except:
rdfn
except:
lyv
except:
vsto
except:
ttd
except:
gden
except:
wms
except:
qtrx
except:
ctmx
except:
fwonk
except:
pbh
except:
ardx
except:
axdx
except:
ceva
except:
env
except:
clnc
except:
sail
except:
gern
except:
nxst
except:
iboc
except:
gm
except:
d
except:
wynn
except:
edit
except:
tell
except:
hmsy
except:
mbio
except:
zbh
except:
amrx
except:
pvac
except:
vhc
except:
prsc
except:
he
except:
petq
except:
aes
exc

In [12]:
{k: v for k, v in sorted(ticker_proba.items(), key=lambda item: item[1],reverse=True)}

{}

In [13]:
import datetime
from datetime import datetime
from yahoo_earnings_calendar import YahooEarningsCalendar
import math

my_custom_delay_s = 0.01

yec = YahooEarningsCalendar(my_custom_delay_s)

# Returns the next earnings date of BOX in Unix timestamp
#print(yec.get_next_earnings_date('box'))
# 1508716800

ticker_date = {}
sml_tickers = []
today=datetime.today()
start=today+timedelta(days=14)
end=today+timedelta(days=16)
#end=today+timedelta(days=18)

for ticker in set(next_reports):
    pred_features=combined[ticker][-1:][cols]
    prediction = estimatorCopy.predict_proba(pred_features)[:,1]
    index = str(ticker)+':'+str(math.floor(100*prediction))

    try:
        date = datetime.fromtimestamp(yec.get_next_earnings_date(ticker))
        ticker_date[index] = date
        if date>start and date<end:
            sml_tickers.append(ticker)
    except:
        ticker_date[index] = datetime(3000, 1, 1, 1, 1)

{k: v for k, v in sorted(ticker_date.items(), key=lambda item: item[1])}


{'rushb:44': datetime.datetime(2020, 2, 11, 6, 0),
 'cwen:24': datetime.datetime(2020, 2, 26, 6, 0),
 'cwen/a:24': datetime.datetime(2020, 2, 26, 6, 0),
 'srne:24': datetime.datetime(2020, 5, 11, 6, 0),
 'nk:44': datetime.datetime(2020, 5, 11, 6, 0),
 'atlc:52': datetime.datetime(2020, 5, 15, 6, 0),
 'nymx:38': datetime.datetime(2020, 5, 15, 6, 0),
 'sieb:31': datetime.datetime(2020, 5, 28, 6, 0),
 'brid:36': datetime.datetime(2020, 5, 29, 6, 0),
 'fcbp:41': datetime.datetime(2020, 7, 21, 6, 0),
 'alex:50': datetime.datetime(2020, 7, 30, 6, 0),
 'tbnk:49': datetime.datetime(2020, 8, 3, 6, 0),
 'rvi:32': datetime.datetime(2020, 8, 5, 6, 0),
 'meet:42': datetime.datetime(2020, 8, 6, 6, 0),
 'mjco:22': datetime.datetime(2020, 8, 10, 6, 0),
 'vhc:36': datetime.datetime(2020, 8, 10, 6, 0),
 'bxg:32': datetime.datetime(2020, 8, 10, 6, 0),
 'ofed:33': datetime.datetime(2020, 8, 14, 6, 0),
 'grif:42': datetime.datetime(2020, 10, 8, 6, 0),
 'taco:44': datetime.datetime(2020, 10, 15, 6, 0),
 'mb

In [14]:
df = pd.DataFrame(sml_tickers)
writer = pd.ExcelWriter('rus_tickers.xlsx', engine='xlsxwriter')
df.to_excel(writer, sheet_name='welcome', index=False)
writer.save()

In [15]:
len(df)

150

In [16]:
df

Unnamed: 0,0
0,amkr
1,gt
2,cpsi
3,jll
4,iivi
...,...
98,dva
99,nsp
100,fisv
101,armk
