# 0. Reading Dataset

In [1]:
import yfinance as yf
import pandas as pd
from datetime import datetime

tickers = ['AAPL', 'MMM']
sectors = ['Tech', 'Industrials']
sectors = dict(zip(tickers, sectors))

stocks = pd.DataFrame()

for ticker in tickers:
    data = yf.download(tickers=[ticker])
    data['Ticker'] = ticker
    data = data.reset_index(drop=False)
    data['Date'] = data['Date'].astype('str')
    data = data[['Date', 'Ticker', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']]
    stocks = pd.concat([stocks, data]).reset_index(drop=True)
    
print(sectors)
display(stocks)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
{'AAPL': 'Tech', 'MMM': 'Industrials'}


Unnamed: 0,Date,Ticker,Open,High,Low,Close,Adj Close,Volume
0,1980-12-12,AAPL,0.128348,0.128906,0.128348,0.128348,0.100039,469033600
1,1980-12-15,AAPL,0.122210,0.122210,0.121652,0.121652,0.094820,175884800
2,1980-12-16,AAPL,0.113281,0.113281,0.112723,0.112723,0.087861,105728000
3,1980-12-17,AAPL,0.115513,0.116071,0.115513,0.115513,0.090035,86441600
4,1980-12-18,AAPL,0.118862,0.119420,0.118862,0.118862,0.092646,73449600
...,...,...,...,...,...,...,...,...
25780,2022-08-22,MMM,143.279999,143.679993,141.029999,141.589996,141.589996,10183400
25781,2022-08-23,MMM,141.000000,142.419998,140.710007,141.750000,141.750000,15358400
25782,2022-08-24,MMM,140.910004,141.589996,139.710007,141.039993,141.039993,15430500
25783,2022-08-25,MMM,141.070007,143.270004,140.639999,142.759995,142.759995,47355600


# 1. Reading Dates

### Original

In [2]:
%%timeit -r 3 -n 100

dates = []

for date in stocks['Date']:
    dt = datetime.strptime(date, '%Y-%M-%d')
    dates.append(dt)
    
stocks['New_Date'] = dates

213 ms ± 979 µs per loop (mean ± std. dev. of 3 runs, 100 loops each)


### Optimized

In [2]:
%%timeit -r 3 -n 100

stocks['New_Date'] = pd.to_datetime(stocks['Date'])

10.1 ms ± 214 µs per loop (mean ± std. dev. of 3 runs, 100 loops each)


# 2. Indexing

### Original

In [4]:
%%timeit -r 3 -n 100

closes = []

for x in range(len(stocks)):
    if stocks['New_Date'][x].year == 2022:
        closes.append(stocks['Close'][x])

258 ms ± 1.9 ms per loop (mean ± std. dev. of 3 runs, 100 loops each)


### Optimized

In [3]:
%%timeit -r 3 -n 100

closes = stocks[stocks['New_Date'].dt.year == 2022]['Close']

1.52 ms ± 189 µs per loop (mean ± std. dev. of 3 runs, 100 loops each)


In [4]:
%%timeit -r 3 -n 100

closes = stocks.loc[stocks['New_Date'].dt.year == 2022, 'Close']

1.34 ms ± 86.3 µs per loop (mean ± std. dev. of 3 runs, 100 loops each)


# 3. Reassigning at Index

### Original

In [5]:
%%timeit -r 3 -n 100

years = []

for x in range(len(stocks)):
    if stocks['New_Date'][x].year == 2022:
        years.append('This Year')
    else:
        years.append('Not This Year')
        
stocks['Year'] = years

259 ms ± 2.22 ms per loop (mean ± std. dev. of 3 runs, 100 loops each)


### Optimized

In [5]:
%%timeit -r 3 -n 100

stocks['Year'] = 'Not This Year'
stocks.loc[stocks['New_Date'].dt.year == 2022, 'Year'] = 'This Year'

1.77 ms ± 324 µs per loop (mean ± std. dev. of 3 runs, 100 loops each)


# 4. Mapping Dictionaries

### Original

In [6]:
%%timeit -r 3 -n 100

sector = []

for x in range(len(stocks)):
    sector.append(sectors[stocks['Ticker'][x]])
        
stocks['Sector'] = sector

112 ms ± 497 µs per loop (mean ± std. dev. of 3 runs, 100 loops each)


### Optimized

In [6]:
%%timeit -r 3 -n 100

stocks['Sector'] = stocks['Ticker'].map(sectors)

1.53 ms ± 206 µs per loop (mean ± std. dev. of 3 runs, 100 loops each)


# 5. Applying Functions

### Original

In [7]:
%%timeit -r 3 -n 100

p_changes = []

for x in range(len(stocks)):
    change = (stocks['Close'][x] - stocks['Open'][x]) / (stocks['Open'][x] + 1e-5)
    p_changes.append(change)
        
stocks['%Change'] = p_changes

344 ms ± 3.12 ms per loop (mean ± std. dev. of 3 runs, 100 loops each)


### Optimized

In [8]:
%%timeit -r 3 -n 100

stocks['%Change'] = stocks[['Open', 'Close']].apply(lambda x: (x[1] -x[0]) / (x[0] + 1e-5), axis=1)

218 ms ± 3.73 ms per loop (mean ± std. dev. of 3 runs, 100 loops each)


In [7]:
%%timeit -r 3 -n 100

def percent_change(x):
    return (x[1] -x[0]) / (x[0] + 1e-5)

stocks['%Change'] = stocks[['Open', 'Close']].apply(percent_change, axis = 1)

223 ms ± 7.89 ms per loop (mean ± std. dev. of 3 runs, 100 loops each)


In [9]:
%%timeit -r 3 -n 100

stocks['%Change'] = (stocks['Close'] - stocks['Open']) / (stocks['Open'] + 1e-5)

491 µs ± 115 µs per loop (mean ± std. dev. of 3 runs, 100 loops each)


# 6. Binning

### Original

In [8]:
%%timeit -r 3 -n 100

bins = []

for x in range(len(stocks)):
    if stocks['%Change'][x] < -2:
        bins.append('down alot')
    elif stocks['%Change'][x] < 0:
        bins.append('down')
    elif stocks['%Change'][x] < 2:
        bins.append('up')
    else:
        bins.append('up alot')
    
stocks['Bins'] = bins

297 ms ± 891 µs per loop (mean ± std. dev. of 3 runs, 100 loops each)


### Optimized

In [10]:
%%timeit -r 3 -n 100

stocks['Bins'] = pd.cut(stocks['%Change'], [-1000000, -2, 0, 2, 1000000], 
                        labels=['down alot', 'down', 'up', 'up alot'])

1.26 ms ± 216 µs per loop (mean ± std. dev. of 3 runs, 100 loops each)
