In [1]:
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)
import os
import yfinance as yf


In [3]:
#We focus on a specific sector to analayze price movements: tech stocks with mcap > 10bn
companies = pd.read_csv('tickers.csv')
techComps = companies[(companies['Sector'] == 'Technology') & (companies['Market Cap'] > 10000000000)]['Symbol']

In [4]:
look_back = 5
period = '5y'
#Ingest Data
all_data = pd.DataFrame()
for symbol in techComps:
    try:
          stock_data = yf.download(symbol, period=period)
          stock_data.columns = stock_data.columns.droplevel(1)
          stock_data.columns.name = None
          stock_data['Ticker'] = symbol
          prices = stock_data
          prices['Date'] = prices.index
          print(f"Downloaded data for {symbol}")

          prices = prices.dropna()

          #Cast numeric data to type float
          prices[['Open', 'High', 'Low', 'Close']] = prices[['Open', 'High', 'Low', 'Close']].astype(float)
          prices['Volume'] = prices['Volume'].astype(int)

          #Create percentage change column for each col to normalize data.
          prices[['Open_pc', 'High_pc', 'Low_pc', 'Close_pc', 'Volume_pc']] = prices[['Open', 'High', 'Low', 'Close', 'Volume']].pct_change()

          #Take the previous 30 days of price data (only for close for this paper)
          for num in range(0,look_back):
            col_name = 'Close_pc' + '_' + str(num)
            prices[col_name] = prices['Close_pc'].shift(num+1)


          #Keep prices on date, movement class, previous -lookback period- days
          cols_to_keep = []
          cols_to_keep = ['Date'] + [col for col in prices.columns if 'Close_pc' in col]
          prices_pattern = prices[cols_to_keep]
          all_data = pd.concat([all_data, prices_pattern], ignore_index=True)
    except Exception as e:
      print(f"Failed to download data for {symbol}: {e}")

all_data = all_data.dropna()
#Expand this to multiple day compound movements in future
all_data['Movement_Class'] = np.where(all_data['Close_pc']>0, 'Positive', 'Negative')
print(len(all_data))
print(all_data.head(10))

  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed


Downloaded data for AAPL
Downloaded data for ADBE


  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for ADI


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for ADP


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for ADSK


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for ALAB


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for AMAT


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for AMD


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for ANSS


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for APH


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for APP


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for ARM


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for ASML


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed

Downloaded data for ASX



  stock_data = yf.download(symbol, period=period)


Downloaded data for AUR


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for AVGO
Downloaded data for BIDU


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed


Downloaded data for BSY
Downloaded data for CACI


  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for CDNS


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for CHKP
Downloaded data for CLS


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for CRM
Downloaded data for CRWD


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for CRWV


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for CTSH


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for CW
Downloaded data for CYBR


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for DDOG
Downloaded data for DELL


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for DOCS
Downloaded data for DOCU


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed


Downloaded data for DT
Downloaded data for DUOL


  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for EA


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for EMR


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for ERIC


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for ETN


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for FDS
Downloaded data for FLEX


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for FLUT
Downloaded data for FSLR


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for FTNT
Downloaded data for GDDY


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for GE


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for GEN
Downloaded data for GFS


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for GOOG
Downloaded data for GOOGL


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for GWRE
Downloaded data for HPE


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for HPQ


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for HUBB
Downloaded data for HUBS


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for IBM


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for INFY


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for INTC


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for INTU


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for IOT


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for JBL


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for JKHY


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for KLAC


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for LDOS


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for LOGI


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed


Downloaded data for LRCX
Downloaded data for MANH


  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for MBLY


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for MCHP


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for MCHPP
Downloaded data for MDB


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for META
Downloaded data for MNDY


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed


Downloaded data for MPWR
Downloaded data for MRVL


  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for MSFT


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for MSI


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for MSTR


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for MU


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for NEE


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for NET


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for NOK
Downloaded data for NOW


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for NTAP


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed


Downloaded data for NTES
Downloaded data for NTNX


  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for NVDA


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for NXPI
Downloaded data for OKTA


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for ON


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for ORCL


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for OTIS


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for PANW


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for PAYC
Downloaded data for PCTY


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed


Downloaded data for PINS
Downloaded data for PLTR


  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for PSTG
Downloaded data for PTC


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for QCOM
Downloaded data for RBLX


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for RBRK
Downloaded data for RDDT


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for SAP


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed


Downloaded data for SHOP
Downloaded data for SMCI


  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for SNAP
Downloaded data for SNOW


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for SNPS


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for SSNC


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for STM
Downloaded data for STRF


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for STRK


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed


Downloaded data for STX
Downloaded data for TEAM


  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed


Downloaded data for TEL
Downloaded data for TOST


  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for TSM
Downloaded data for TTAN


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed


Downloaded data for TTD
Downloaded data for TTWO


  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for TWLO


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for TXN
Downloaded data for TYL


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for UI


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for UMC
Downloaded data for VEEV


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed


Downloaded data for VRSK
Downloaded data for VRSN


  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for VRT
Downloaded data for WDAY


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for WDC


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for WIT
Downloaded data for XYZ


[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)
[*********************100%***********************]  1 of 1 completed
  stock_data = yf.download(symbol, period=period)


Downloaded data for YMM
Downloaded data for ZM


[*********************100%***********************]  1 of 1 completed


Downloaded data for ZS
157662
         Date  Close_pc  Close_pc_0  Close_pc_1  Close_pc_2  Close_pc_3  \
6  2020-08-11 -0.029740    0.014535   -0.022736    0.034889    0.003625   
7  2020-08-12  0.033234   -0.029740    0.014535   -0.022736    0.034889   
8  2020-08-13  0.017698    0.033234   -0.029740    0.014535   -0.022736   
9  2020-08-14 -0.000891    0.017698    0.033234   -0.029740    0.014535   
10 2020-08-17 -0.002611   -0.000891    0.017698    0.033234   -0.029740   
11 2020-08-18  0.008333   -0.002611   -0.000891    0.017698    0.033234   
12 2020-08-19  0.001255    0.008333   -0.002611   -0.000891    0.017698   
13 2020-08-20  0.022190    0.001255    0.008333   -0.002611   -0.000891   
14 2020-08-21  0.051532    0.022190    0.001255    0.008333   -0.002611   
15 2020-08-24  0.011960    0.051532    0.022190    0.001255    0.008333   

    Close_pc_4 Movement_Class  
6     0.006678       Negative  
7     0.003625       Positive  
8     0.034889       Positive  
9    -0.022736  

In [10]:
all_data.shape
print('Negative points: ',(all_data.Movement_Class=='Negative').sum())
print('Positive points: ',(all_data.Movement_Class=='Positive').sum())

Negative points:  76101
Positive points:  81561


In [11]:
all_data.to_csv('techStockPrices.csv', index=False)