In [None]:
import pandas as pd
import numpy as np
import glob
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
files = [f for f in glob.glob("*.csv") if f != "stock_next_day_returns.csv"]
predictions_list = []
for file in files:
    stock_name = file.split(".")[0]
    df = pd.read_csv(file)
    required_cols = ['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']
    if not all(col in df.columns for col in required_cols):
        continue
    df['Date'] = pd.to_datetime(df['Date'])
    df.sort_values('Date', inplace=True)
    df['Return'] = df['Close'].pct_change()
    df['MA7'] = df['Close'].rolling(7).mean()
    df['MA30'] = df['Close'].rolling(30).mean()
    df.dropna(inplace=True)
    if len(df) < 50:
        continue
    X = df[['Close', 'MA7', 'MA30']]
    y = df['Return']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
    rf = RandomForestRegressor(n_estimators=100, random_state=42)
    rf.fit(X_train, y_train)
    y_pred = rf.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    print(f"{stock_name} -> RMSE: {rmse:.6f}, R2: {r2:.6f}")
    latest_data = df[['Close', 'MA7', 'MA30']].iloc[-1:]
    next_day_return = rf.predict(latest_data)[0]
    predictions_list.append({
        'Stock': stock_name,
        'Predicted_NextDay_Return': next_day_return,
        'RMSE': rmse,
        'R2': r2
    })
predictions_df = pd.DataFrame(predictions_list)
predictions_df.sort_values('Predicted_NextDay_Return', ascending=False, inplace=True)
print("\nPredicted Next-Day Returns for All Stocks:")
print(predictions_df)
predictions_df.to_csv("stock_next_day_returns.csv", index=False)
