In [2]:
import pandas as pd
import yfinance as yf

SMCI = yf.download("SMCI", start="2017-01-01", end="2026-01-01", auto_adjust=True).reset_index()

[*********************100%***********************]  1 of 1 completed


In [4]:
SMCI.shape

(2222, 6)

In [6]:
SMCI.head()

Price,Date,Close,High,Low,Open,Volume
Ticker,Unnamed: 1_level_1,SMCI,SMCI,SMCI,SMCI,SMCI
0,2017-01-03,2.845,2.875,2.813,2.825,1808000
1,2017-01-04,2.885,2.9,2.84,2.85,2532000
2,2017-01-05,2.855,2.9,2.82,2.87,1196000
3,2017-01-06,2.845,2.9,2.83,2.875,1413000
4,2017-01-09,2.825,2.89,2.825,2.845,1360000


In [8]:
SMCI.describe()

Price,Date,Close,High,Low,Open,Volume
Ticker,Unnamed: 1_level_1,SMCI,SMCI,SMCI,SMCI,SMCI
count,2222,2222.0,2222.0,2222.0,2222.0,2222.0
mean,2021-06-02 05:22:05.292529408,15.638361,16.156527,15.133826,15.641389,19682860.0
min,2017-01-03 00:00:00,1.165,1.216,0.85,1.155,13000.0
25%,2019-03-20 06:00:00,2.32,2.35,2.27575,2.30775,2229000.0
50%,2021-06-02 12:00:00,3.566,3.6145,3.5235,3.559,4227000.0
75%,2023-08-16 18:00:00,25.441751,26.370251,24.816,25.530001,24813980.0
max,2025-11-03 00:00:00,118.806999,122.900002,112.234001,121.199997,369735000.0
std,,23.076197,23.954901,22.233726,23.137707,35234950.0


In [10]:
SMCI.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2222 entries, 0 to 2221
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   (Date, )        2222 non-null   datetime64[ns]
 1   (Close, SMCI)   2222 non-null   float64       
 2   (High, SMCI)    2222 non-null   float64       
 3   (Low, SMCI)     2222 non-null   float64       
 4   (Open, SMCI)    2222 non-null   float64       
 5   (Volume, SMCI)  2222 non-null   int64         
dtypes: datetime64[ns](1), float64(4), int64(1)
memory usage: 104.3 KB


In [20]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

# assumes you already have: smci = yf.download("SMCI", ...).reset_index()

# ---- feature prep ----
df = SMCI.copy()
df["Return"] = df["Close"].pct_change()
df["MA50"] = df["Close"].rolling(50).mean()
df["MA200"] = df["Close"].rolling(200).mean()
df["VolMA30"] = df["Volume"].rolling(30).mean()
df["Volatility30"] = df["Return"].rolling(30).std() * np.sqrt(252)  # annualized

Path("figures").mkdir(exist_ok=True)

# ---- Figure 1: Close with 50/200-day moving averages ----
plt.figure(figsize=(10,4))
plt.plot(df["Date"], df["Close"], label="Close")
plt.plot(df["Date"], df["MA50"], label="MA50")
plt.plot(df["Date"], df["MA200"], label="MA200")
plt.title("SMCI Close with 50/200-Day Moving Averages")
plt.xlabel("Date"); plt.ylabel("Price")
plt.legend()
plt.tight_layout()
plt.savefig("figures/smci_close_ma.png", dpi=220, bbox_inches="tight")
plt.close()

# ---- Figure 2: Histogram of daily returns ----
mu, sigma = df["Return"].mean(), df["Return"].std()
plt.figure(figsize=(10,4))
plt.hist(df["Return"].dropna(), bins=60)
plt.title(f"SMCI Daily Returns Histogram (μ={mu:.4f}, σ={sigma:.4f})")
plt.xlabel("Daily Return"); plt.ylabel("Count")
plt.tight_layout()
plt.savefig("figures/smci_returns_hist.png", dpi=220, bbox_inches="tight")
plt.close()

# ---- Figure 3: 30-Day Rolling Volatility (annualized) ----
plt.figure(figsize=(10,4))
plt.plot(df["Date"], df["Volatility30"])
plt.title("SMCI 30-Day Rolling Volatility (Annualized)")
plt.xlabel("Date"); plt.ylabel("Volatility")
plt.tight_layout()
plt.savefig("figures/smci_volatility30.png", dpi=220, bbox_inches="tight")
plt.close()

# ---- Figure 4 (fix): Volume bars using positional index + MA line ----
plt.figure(figsize=(10,4))
plt.plot(df["Date"], df["Volume"])
plt.title("SMCI Daily Volume")
plt.xlabel("Date"); plt.ylabel("Shares")
plt.tight_layout()
plt.savefig("figures/smci_volume_line.png", dpi=220, bbox_inches="tight")
plt.close()


"Saved 4 figures → figures/ (smci_close_ma.png, smci_returns_hist.png, smci_volatility30.png, smci_volume.png)"


'Saved 4 figures → figures/ (smci_close_ma.png, smci_returns_hist.png, smci_volatility30.png, smci_volume.png)'

In [28]:
import numpy as np
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# assumes: smci has Date, Close, Volume

df = (
    smci.sort_values("Date")
        .assign(
            ret1 = np.log(smci["Close"]).diff(),
            ret5 = lambda x: x["ret1"].rolling(5).mean(),
            ret21= lambda x: x["ret1"].rolling(21).mean(),
            vol21= lambda x: x["ret1"].rolling(21).std(),
            lv_chg = np.log(smci["Volume"]).diff(),
            y_next = lambda x: x["ret1"].shift(-1)
        )
        .dropna()
)

feat_cols = ["ret1", "ret5", "ret21", "vol21", "lv_chg"]
X = df[feat_cols].to_numpy()
y = df["y_next"].to_numpy()

n = len(df); n_tr = int(0.8*n)
model = Ridge(alpha=1.0)
model.fit(X[:n_tr], y[:n_tr])
y_pred = model.predict(X[n_tr:])

mae  = mean_absolute_error(y[n_tr:], y_pred)
rmse = mean_squared_error(y[n_tr:], y_pred, squared=False)
r2   = r2_score(y[n_tr:], y_pred)

print(f"Train:{n_tr} Test:{n-n_tr}")
print(f"MAE {mae:.6f}  RMSE {rmse:.6f}  R^2 {r2:.6f}")
print("Coeffs:", dict(zip(feat_cols, np.round(model.coef_, 6))))


Train:1760 Test:440
MAE 0.045200  RMSE 0.066439  R^2 -0.002139
Coeffs: {'ret1': 0.003716, 'ret5': 0.003647, 'ret21': 0.001877, 'vol21': 0.04401, 'lv_chg': 0.000425}


