# Possible Caching Technique From Data - Streamlit

What happens in Streamlit

1. Streamlit re-runs your script top-to-bottom on every interaction (select box change, button click, etc.).

2. To avoid re-downloading data on every rerun, you use caching.

3. When the user really wants fresh data, you bust the cache (manually or with a time-to-live).

Use three pure functions in a module (data.py), then one small wrapper that Streamlit caches.

In [None]:
# preprocess.py (functional, pure)
import pandas as pd
import yfinance as yf

def fetch_prices(tickers, start, end):
    raw = yf.download(tickers, start=start, end=end, auto_adjust=False, group_by="ticker")
    raw.index = pd.to_datetime(raw.index)
    return raw.sort_index()

def to_long_with_sector(raw, sectors):
    fields = ["Open", "High", "Low", "Close", "Volume"]
    rows = []
    for sector, tkr_list in sectors.items():
        for tkr in tkr_list:
            if tkr not in raw.columns.get_level_values(0): 
                continue
            df = raw[tkr].copy()
            df["Ticker"] = tkr
            df["Sector"] = sector
            df["Date"] = df.index
            rows.append(df[["Date","Ticker","Sector"] + fields].reset_index(drop=True))
    long_df = pd.concat(rows, ignore_index=True).sort_values(["Ticker","Date"]).reset_index(drop=True)
    # (light clean: ffill/interp if you need)
    return long_df

def to_wide(long_df, field):
    w = long_df.pivot(index="Date", columns="Ticker", values=field).sort_index().ffill()
    return w


Use st.cache_data to cache the result of your pure function(s). Add a TTL (e.g., 1 hour) and a manual refresh button to bust the cache on demand.

In [None]:
# app.py
import time
import pandas as pd
import streamlit as st
from datetime import date, timedelta
from preprocess import fetch_prices, to_long_with_sector, to_wide

SECTORS = {
    "Tech": ["AAPL","MSFT","NVDA"],
    "Finance": ["JPM","BAC","MA"],
    "Healthcare": ["JNJ","PFE","ABBV"],
    "Consumer Goods": ["PEP","MCD","NKE"],
    "Energy": ["XOM","SHEL","CVX"],
}

# ---- 1) Cached loader ----
@st.cache_data(ttl=3600)  # cache for 1 hour
def load_data(tickers, start, end, sectors, _nonce=None):
    """Fetch + transform. _nonce is used only to force refresh when needed."""
    raw = fetch_prices(tickers, start, end)
    long_df = to_long_with_sector(raw, sectors)
    close_df = to_wide(long_df, "Close")
    return long_df, close_df

# ---- 2) UI controls ----
st.title("Stalking Stocks (demo)")
all_tickers = sum(SECTORS.values(), [])
default_start = date.today() - timedelta(days=365*3)
start = st.date_input("Start date", value=default_start)
end   = st.date_input("End date", value=date.today())
selected_sector = st.selectbox("Sector", list(SECTORS.keys()))
selected_ticker = st.selectbox("Ticker", SECTORS[selected_sector])

col1, col2 = st.columns(2)
with col1:
    do_refresh = st.button("🔄 Refresh data now")
with col2:
    st.caption("Data auto-refreshes every hour (TTL).")

# ---- 3) Force-refresh mechanism ----
# When pressed, pass a changing _nonce to cache key so it refetches
nonce = time.time() if do_refresh else None

# ---- 4) Load data (cached unless refresh) ----
with st.spinner("Loading data..."):
    long_df, close_df = load_data(all_tickers, start, end, SECTORS, _nonce=nonce)

# ---- 5) Use data
st.line_chart(close_df[[selected_ticker]])
st.dataframe(long_df[long_df["Ticker"] == selected_ticker].tail(10))


# Simple way to call yFinance and add Sectors for own analyis/exploration

In [5]:
# Yahoo Finance API to pull the data
import yfinance as yf
# Pandas and Numpy for data maniupulation 
import pandas as pd
import numpy as np

# Example tickers (one per sector)
tickers = ['AAPL', 'JPM', 'PFE', 'PG', 'XOM']
sectors = {
    'AAPL': 'Technology',
    'JPM': 'Finance',
    'PFE': 'Healthcare',
    'PG': 'Consumer Goods',
    'XOM': 'Energy'
}

# Function to fetch and flatten stock data
def fetch_stock_data(tickers, start_date='2023-01-01', end_date='2025-01-01'):
    df = yf.download(tickers, start=start_date, end=end_date, group_by='ticker', progress=False)
    flat_data = []
    for ticker in tickers:
        temp = df[ticker].reset_index()
        temp['Ticker'] = ticker
        temp['Sector'] = sectors[ticker]  # Add sector info
        flat_data.append(temp)
    combined_df = pd.concat(flat_data, ignore_index=True)
    return combined_df

# Fetch data
df = fetch_stock_data(tickers)
df.head()


  df = yf.download(tickers, start=start_date, end=end_date, group_by='ticker', progress=False)


Price,Date,Open,High,Low,Close,Volume,Ticker,Sector
0,2023-01-03,128.468178,129.079551,122.44315,123.330635,112117500,AAPL,Technology
1,2023-01-04,125.125335,126.870724,123.340509,124.602707,89113600,AAPL,Technology
2,2023-01-05,125.361983,125.993082,123.024948,123.281326,80962700,AAPL,Technology
3,2023-01-06,124.257586,128.478056,123.153159,127.817375,87754700,AAPL,Technology
4,2023-01-09,128.655523,131.554638,128.083587,128.339966,70790800,AAPL,Technology


In [6]:
print(df.columns)

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Ticker', 'Sector'], dtype='object', name='Price')


# Next note

# Next note