In [1]:
!pip install yfinance autogen scikit-learn pandas numpy

Collecting autogen
  Downloading autogen-0.7.5-py3-none-any.whl.metadata (19 kB)
Collecting pyautogen==0.7.5 (from autogen)
  Downloading pyautogen-0.7.5-py3-none-any.whl.metadata (27 kB)
Collecting asyncer==0.0.8 (from pyautogen==0.7.5->autogen)
  Downloading asyncer-0.0.8-py3-none-any.whl.metadata (6.7 kB)
Collecting diskcache (from pyautogen==0.7.5->autogen)
  Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)
Collecting docker (from pyautogen==0.7.5->autogen)
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting fast-depends<3,>=2.4.12 (from pyautogen==0.7.5->autogen)
  Downloading fast_depends-2.4.12-py3-none-any.whl.metadata (7.6 kB)
Collecting python-dotenv (from pyautogen==0.7.5->autogen)
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Collecting tiktoken (from pyautogen==0.7.5->autogen)
  Downloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading autogen-0.7.5-py3-none-an

In [4]:
import yfinance as yf
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler

# Define construction-related stock symbols
construction_stocks = ["CAT", "VMC", "XHB"]  # Caterpillar, Vulcan Materials, Homebuilders ETF

# Function to fetch stock data
def fetch_stock_data(stock_symbol, period="3mo", interval="1d"):
    stock = yf.Ticker(stock_symbol)
    df = stock.history(period=period, interval=interval)
    df["Return"] = df["Close"].pct_change()  # Daily return
    df["Volatility"] = df["Return"].rolling(window=5).std()  # Volatility calculation
    df["Stock"] = stock_symbol  # Identify stock
    df.dropna(inplace=True)
    return df

# Fetch data for all stocks
stock_data = {stock: fetch_stock_data(stock) for stock in construction_stocks}

# Combine into a single dataset
dataframes = []
for stock, df in stock_data.items():
    df["Stock"] = stock
    dataframes.append(df)

final_df = pd.concat(dataframes)

# Feature Engineering
final_df["Moving_Avg_10"] = final_df["Close"].rolling(window=10).mean()
# final_df["Moving_Avg_10"].fillna(final_df["Close"].rolling(window=5).mean(), inplace=True)  # Handle NaN
final_df["Moving_Avg_10"] = final_df["Moving_Avg_10"].fillna(final_df["Close"].rolling(window=5).mean())
# Lowered Risk Threshold to 50th Percentile
final_df["Risk_Level"] = np.where(final_df["Volatility"] > final_df["Volatility"].quantile(0.50), 1, 0)

# Print data to verify risk levels
features = df[["Close", "Return", "Volatility"]].bfill()

# Prepare for model training
features = ["Close", "Return", "Volatility", "Moving_Avg_10"]
X = final_df[features]
y = final_df["Risk_Level"]

# Scale data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train a Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_scaled, y)

import time

def predict_risk():
    results = []
    for stock in construction_stocks:
        df = fetch_stock_data(stock, period="6mo", interval="1d").tail(1)  # Fetch last day's data

        if df.empty:
            print(f"No recent stock data available for {stock}. Retrying with longer period...")
            time.sleep(2)  # Short delay before retrying
            df = fetch_stock_data(stock, period="1y", interval="1d").tail(1)  # Try fetching 1 year data

        if not df.empty:
            features = df[["Close", "Return", "Volatility"]].bfill()
            features["Moving_Avg_10"] = df["Close"].rolling(window=10).mean()
            features_scaled = scaler.transform(features)
            risk_pred = model.predict(features_scaled)
            results.append({"Stock": stock, "Predicted Risk": "High" if risk_pred[0] == 1 else "Low"})
        else:
            print(f"Skipping {stock}, still no data after retry.")

    return results

# **Run the updated real-time prediction**
risk_predictions = predict_risk()

# **Display predictions**
print("\nFinal Risk Predictions:")
for res in risk_predictions:
    print(f"Stock: {res['Stock']} | Predicted Risk Level: {res['Predicted Risk']}")



Final Risk Predictions:
Stock: CAT | Predicted Risk Level: Low
Stock: VMC | Predicted Risk Level: High
Stock: XHB | Predicted Risk Level: High


In [5]:
print(final_df.head())  # Check if data is retrieved

                                 Open        High         Low       Close  \
Date                                                                        
2024-12-02 00:00:00-05:00  405.971696  406.659178  399.326076  401.039795   
2024-12-03 00:00:00-05:00  402.046085  404.048753  395.151379  397.801666   
2024-12-04 00:00:00-05:00  397.114182  400.003583  394.683092  398.050751   
2024-12-05 00:00:00-05:00  399.037119  399.963715  392.770086  393.537292   
2024-12-06 00:00:00-05:00  396.695696  398.279885  391.305453  393.587097   

                            Volume  Dividends  Stock Splits    Return  \
Date                                                                    
2024-12-02 00:00:00-05:00  1548500        0.0           0.0 -0.008865   
2024-12-03 00:00:00-05:00  1716200        0.0           0.0 -0.008074   
2024-12-04 00:00:00-05:00  1193800        0.0           0.0  0.000626   
2024-12-05 00:00:00-05:00  1291400        0.0           0.0 -0.011339   
2024-12-06 00:00:00-05

In [6]:
import pandas as pd

risk_predictions = predict_risk()

# Convert predictions into a DataFrame
stock_risk_df = pd.DataFrame(risk_predictions)
stock_risk_df["Risk_Score"] = stock_risk_df["Predicted Risk"].apply(lambda x: 1 if x == "High" else 0)

# Save to CSV
stock_risk_df.to_csv("stock_risk.csv", index=False)

print("Stock risk results saved!")

Stock risk results saved!
