In [1]:
import pandas as pd
import numpy as np
import yfinance as yf
from datetime import datetime, timedelta
from joblib import load
import os

# --------------------------------
# CSV Path
# --------------------------------
csv_path = "data/Coca-Cola_stock_updated.csv"

# --------------------------------
# Load Existing Data (if available)
# --------------------------------
if os.path.exists(csv_path):
    df = pd.read_csv(csv_path)
    df["Date"] = pd.to_datetime(df["Date"], errors="coerce", utc=True)
    df.dropna(subset=["Date"], inplace=True)
    df.set_index("Date", inplace=True)
    df.sort_index(inplace=True)
    print(f"✅ Loaded existing data with {len(df)} rows.")
else:
    df = pd.DataFrame()
    print("📁 No existing CSV found. Starting fresh.")

# --------------------------------
# Update Stock Data
# --------------------------------
if not df.empty:
    last_date = df.index[-1].date()
else:
    last_date = datetime.now().date() - timedelta(days=180)

today = datetime.now().date()
print(f"🔄 Updating data from {last_date} to {today}...")

try:
    new_data = yf.download("KO", start=last_date + timedelta(days=1), end=today + timedelta(days=1), progress=False, auto_adjust=False)
except Exception as e:
    raise RuntimeError(f"❌ yfinance download failed: {e}")

if not new_data.empty:
    new_data.index = pd.to_datetime(new_data.index, utc=True)
    df = pd.concat([df, new_data])
    df = df[~df.index.duplicated()]
    print(f"✅ Added {len(new_data)} new rows.")
else:
    print("⚠️ No new data fetched from Yahoo Finance.")

# --------------------------------
# Keep required columns and calculate extra feature
# --------------------------------
expected = ["Open", "High", "Low", "Close", "Adj Close", "Volume"]
df = df[[col for col in expected if col in df.columns]]
df["Daily_Return"] = df["Close"].pct_change()
df.dropna(inplace=True)

# --------------------------------
# Save Clean CSV
# --------------------------------
df.to_csv(csv_path)
print(f"📁 Data saved to: {csv_path}")

✅ Loaded existing data with 6421 rows.
🔄 Updating data from 2022-03-11 to 2025-06-15...
✅ Added 817 new rows.
📁 Data saved to: data/Coca-Cola_stock_updated.csv


  df["Daily_Return"] = df["Close"].pct_change()


In [2]:
df["Daily_Return"] = df["Close"].pct_change()
df["MA20"] = df["Close"].rolling(window=20).mean()
df["MA50"] = df["Close"].rolling(window=50).mean()
df["Volatility"] = df["Daily_Return"].rolling(window=20).std()

df.dropna(inplace=True)

model_path = "outputs/linear_model.pkl"

# ----------------------------
# Check Minimum Data
# ----------------------------
if len(df) < 60:
    raise ValueError(f"❌ Only {len(df)} valid rows after cleaning. Need at least 60 to proceed.")

# ----------------------------
# Prepare Latest Row for Prediction
# ----------------------------
latest_row = df.iloc[-1]

X_latest = pd.DataFrame([{
    "Open": latest_row["Open"],
    "High": latest_row["High"],
    "Low": latest_row["Low"],
    "Volume": latest_row["Volume"],
    "MA20": latest_row["MA20"],
    "MA50": latest_row["MA50"],
    "Volatility": latest_row["Volatility"]
}])

# ----------------------------
# Load Model and Predict
# ----------------------------
if not os.path.exists(model_path):
    raise FileNotFoundError(f"❌ Model file not found at: {model_path}")

model = load(model_path)

if model.n_features_in_ != X_latest.shape[1]:
    raise ValueError(f"❌ Model expects {model.n_features_in_} features, but got {X_latest.shape[1]}")

predicted_price = model.predict(X_latest)[0]
latest_close = latest_row["Close"]
change = predicted_price - latest_close
direction = "📈 Up" if change > 0 else "📉 Down"

# ----------------------------
# Save Latest Features for Debugging
# ----------------------------
X_latest["Predicted_Close"] = predicted_price
X_latest["Actual_Close"] = latest_close
X_latest["Change"] = change
X_latest["Direction"] = direction
X_latest["Date"] = latest_row.name
X_latest.set_index("Date", inplace=True)
X_latest.to_csv("outputs/latest_prediction_input.csv")

# ----------------------------
# Output
# ----------------------------
print("\n📊 Prediction Summary")
print(f"Current Close:     ${latest_close:.2f}")
print(f"Predicted Close:   ${predicted_price:.2f}")
print(f"Expected Change:   ${change:.2f} → {direction}")


📊 Prediction Summary
Current Close:     $56.65
Predicted Close:   $57.36
Expected Change:   $0.71 → 📈 Up


In [3]:
df["Daily_Return"] = df["Close"].pct_change()
df["MA20"] = df["Close"].rolling(window=20).mean()
df["MA50"] = df["Close"].rolling(window=50).mean()
df["Volatility"] = df["Daily_Return"].rolling(window=20).std()

df.dropna(inplace=True)


# ----------------------------
# Check Minimum Data
# ----------------------------
if len(df) < 60:
    raise ValueError(f"❌ Only {len(df)} valid rows after cleaning. Need at least 60 to proceed.")

# ----------------------------
# Prepare Latest Row for Prediction
# ----------------------------
latest_row = df.iloc[-1]

X_latest = pd.DataFrame([{
    "Open": latest_row["Open"],
    "High": latest_row["High"],
    "Low": latest_row["Low"],
    "Volume": latest_row["Volume"],
    "MA20": latest_row["MA20"],
    "MA50": latest_row["MA50"],
    "Volatility": latest_row["Volatility"]
}])

# ----------------------------
# Load Model and Predict
# ----------------------------
if not os.path.exists(model_path):
    raise FileNotFoundError(f"❌ Model file not found at: {model_path}")

model = load(model_path)

if model.n_features_in_ != X_latest.shape[1]:
    raise ValueError(f"❌ Model expects {model.n_features_in_} features, but got {X_latest.shape[1]}")

predicted_price = model.predict(X_latest)[0]
latest_close = latest_row["Close"]
change = predicted_price - latest_close
direction = "📈 Up" if change > 0 else "📉 Down"

# ----------------------------
# Save Latest Features for Debugging
# ----------------------------
X_latest["Predicted_Close"] = predicted_price
X_latest["Actual_Close"] = latest_close
X_latest["Change"] = change
X_latest["Direction"] = direction
X_latest["Date"] = latest_row.name
X_latest.set_index("Date", inplace=True)
X_latest.to_csv("latest_prediction_input.csv")

# ----------------------------
# Output
# ----------------------------
print("\n📊 Prediction Summary")
print(f"Current Close:     ${latest_close:.2f}")
print(f"Predicted Close:   ${predicted_price:.2f}")
print(f"Expected Change:   ${change:.2f} → {direction}")


📊 Prediction Summary
Current Close:     $56.65
Predicted Close:   $57.36
Expected Change:   $0.71 → 📈 Up
