In [95]:
!pip install yfinance pandas numpy scikit-learn xgboost matplotlib



In [96]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt



In [97]:
import yfinance as yf
import pandas as pd

# Function to fetch stock prices using an alternative approach
def get_tech_stock_data(tickers, start="2019-01-01"):
    all_data = []  # List to store individual stock DataFrames

    for ticker in tickers:
        print(f"Fetching data for {ticker}...")

        try:
            #  Use .history() instead of yf.download()
            stock = yf.Ticker(ticker)
            stock_data = stock.history(start=start, interval="1d")[["Close"]]

            if stock_data.empty:
                print(f" No data found for {ticker}. Skipping...")
                continue

            stock_data.rename(columns={"Close": ticker}, inplace=True)  # Rename column to stock ticker
            all_data.append(stock_data)  # Store DataFrame

        except Exception as e:
            print(f"Error fetching {ticker}: {e}")

    #  Combine all stock data using pd.concat()
    if all_data:
        df = pd.concat(all_data, axis=1)  # Merge all stocks along columns
        return df.dropna()  # Drop any rows with missing values
    else:
        raise ValueError(" No valid stock data found.")

# List of tech stocks
tech_stocks = ["AAPL", "MSFT", "GOOGL", "NVDA", "TSLA", "AMZN", "META", "AMD"]

# Fetch historical stock data
tech_df = get_tech_stock_data(tech_stocks)

# Display first few rows
print(tech_df.head())


Fetching data for AAPL...
Fetching data for MSFT...
Fetching data for GOOGL...
Fetching data for NVDA...
Fetching data for TSLA...
Fetching data for AMZN...
Fetching data for META...
Fetching data for AMD...
                                AAPL       MSFT      GOOGL      NVDA  \
Date                                                                   
2019-01-02 00:00:00-05:00  37.667187  95.119827  52.543530  3.378090   
2019-01-03 00:00:00-05:00  33.915264  91.620537  51.088299  3.173995   
2019-01-04 00:00:00-05:00  35.363075  95.881752  53.708801  3.377346   
2019-01-07 00:00:00-05:00  35.284359  96.004051  53.601688  3.556145   
2019-01-08 00:00:00-05:00  35.956982  96.700142  54.072483  3.467613   

                                TSLA       AMZN        META        AMD  
Date                                                                    
2019-01-02 00:00:00-05:00  20.674667  76.956497  135.163666  18.830000  
2019-01-03 00:00:00-05:00  20.024000  75.014000  131.238663  17.0499

In [98]:
def get_nasdaq_data(start="2019-01-01"):
    """
    Fetches Nasdaq-100 index (^NDX) historical closing prices.
    """
    print("Fetching Nasdaq-100 Index data...")
    try:
        # Fetch Nasdaq-100 historical data
        nasdaq = yf.Ticker("^NDX")
        nasdaq_data = nasdaq.history(start=start, interval="1d")[["Close"]]

        # If no data, raise an error
        if nasdaq_data.empty:
            raise ValueError(" No data found for Nasdaq-100 (^NDX).")

        nasdaq_data.rename(columns={"Close": "Nasdaq"}, inplace=True)  # Rename column for clarity
        return nasdaq_data

    except Exception as e:
        print(f" Error fetching Nasdaq data: {e}")
        return None


In [99]:
# Fetch tech stock data
tech_df = get_tech_stock_data(tech_stocks)

# Fetch Nasdaq-100 data
nasdaq_df = get_nasdaq_data()

# Ensure Nasdaq data exists before merging
if nasdaq_df is not None:
    tech_df = tech_df.join(nasdaq_df, how="inner")  # Merge Nasdaq data with stock data
    print(" Nasdaq data successfully merged!")
else:
    print(" Skipping Nasdaq data merge due to missing data.")

# Display the first few rows
print(tech_df.head())


Fetching data for AAPL...
Fetching data for MSFT...
Fetching data for GOOGL...
Fetching data for NVDA...
Fetching data for TSLA...
Fetching data for AMZN...
Fetching data for META...
Fetching data for AMD...
Fetching Nasdaq-100 Index data...
 Nasdaq data successfully merged!
                                AAPL       MSFT      GOOGL      NVDA  \
Date                                                                   
2019-01-02 00:00:00-05:00  37.667183  95.119812  52.543530  3.378090   
2019-01-03 00:00:00-05:00  33.915253  91.620544  51.088303  3.173996   
2019-01-04 00:00:00-05:00  35.363075  95.881737  53.708797  3.377346   
2019-01-07 00:00:00-05:00  35.284363  96.004036  53.601692  3.556145   
2019-01-08 00:00:00-05:00  35.956989  96.700119  54.072483  3.467614   

                                TSLA       AMZN        META        AMD  \
Date                                                                     
2019-01-02 00:00:00-05:00  20.674667  76.956497  135.163666  18.830000 

## **Feature Engineering and function indicators**

---







In [100]:
print(tech_df.head())  # Display first few rows


                                AAPL       MSFT      GOOGL      NVDA  \
Date                                                                   
2019-01-02 00:00:00-05:00  37.667183  95.119812  52.543530  3.378090   
2019-01-03 00:00:00-05:00  33.915253  91.620544  51.088303  3.173996   
2019-01-04 00:00:00-05:00  35.363075  95.881737  53.708797  3.377346   
2019-01-07 00:00:00-05:00  35.284363  96.004036  53.601692  3.556145   
2019-01-08 00:00:00-05:00  35.956989  96.700119  54.072483  3.467614   

                                TSLA       AMZN        META        AMD  \
Date                                                                     
2019-01-02 00:00:00-05:00  20.674667  76.956497  135.163666  18.830000   
2019-01-03 00:00:00-05:00  20.024000  75.014000  131.238678  17.049999   
2019-01-04 00:00:00-05:00  21.179333  78.769501  137.425034  19.000000   
2019-01-07 00:00:00-05:00  22.330667  81.475502  137.524673  20.570000   
2019-01-08 00:00:00-05:00  22.356667  82.829002  14

In [101]:
def add_technical_indicators(df):
    """
    Adds key technical indicators to the DataFrame:
    - 10-day & 50-day Moving Averages
    - 10-day Volatility
    - 10-day Price Momentum
    """
    df = df.copy()

    #  Compute Moving Averages (Trends)
    df["Nasdaq_10d_MA"] = df["Nasdaq"].rolling(window=10).mean()  # 10-day Moving Average
    df["Nasdaq_50d_MA"] = df["Nasdaq"].rolling(window=50).mean()  # 50-day Moving Average

    #  Compute Volatility (Risk Measurement)
    df["Nasdaq_Volatility"] = df["Nasdaq"].pct_change().rolling(window=10).std()  # 10-day Standard Deviation of % Change

    #  Compute Momentum (Price Speed)
    df["Nasdaq_Momentum"] = df["Nasdaq"].diff(periods=10)  # Price difference over 10 days

    #  Drop missing values created by rolling calculations
    df.dropna(inplace=True)

    return df


In [102]:
if 'tech_df' in locals():
    print(" Displaying the first few rows of the dataset with technical indicators:")
    print(tech_df.head())  # Print the first few rows for review
else:
    print(" tech_df is not defined. Please ensure data fetching and processing are completed.")


 Displaying the first few rows of the dataset with technical indicators:
                                AAPL       MSFT      GOOGL      NVDA  \
Date                                                                   
2019-01-02 00:00:00-05:00  37.667183  95.119812  52.543530  3.378090   
2019-01-03 00:00:00-05:00  33.915253  91.620544  51.088303  3.173996   
2019-01-04 00:00:00-05:00  35.363075  95.881737  53.708797  3.377346   
2019-01-07 00:00:00-05:00  35.284363  96.004036  53.601692  3.556145   
2019-01-08 00:00:00-05:00  35.956989  96.700119  54.072483  3.467614   

                                TSLA       AMZN        META        AMD  \
Date                                                                     
2019-01-02 00:00:00-05:00  20.674667  76.956497  135.163666  18.830000   
2019-01-03 00:00:00-05:00  20.024000  75.014000  131.238678  17.049999   
2019-01-04 00:00:00-05:00  21.179333  78.769501  137.425034  19.000000   
2019-01-07 00:00:00-05:00  22.330667  81.475502  137

**## Training the data**





In [103]:
# Define target variable: Market Movement (1 = Bullish, 0 = Bearish)
tech_df["Market_Trend"] = (tech_df["Nasdaq"].shift(-1) > tech_df["Nasdaq"]).astype(int)


In [116]:
# ... (Previous code remains unchanged) ...

# Fetch tech stock data
tech_df = get_tech_stock_data(tech_stocks)

# Fetch Nasdaq-100 data
nasdaq_df = get_nasdaq_data()

# Ensure Nasdaq data exists before merging
if nasdaq_df is not None:
    tech_df = tech_df.join(nasdaq_df, how="inner")  # Merge Nasdaq data with stock data
    print(" Nasdaq data successfully merged!")
else:
    print(" Skipping Nasdaq data merge due to missing data.")

# Add technical indicators *here* before defining target variable and features
tech_df = add_technical_indicators(tech_df)

# Display the first few rows
print(tech_df.head())

# ... (Rest of the code remains unchanged) ...

Fetching data for AAPL...
Fetching data for MSFT...
Fetching data for GOOGL...
Fetching data for NVDA...
Fetching data for TSLA...
Fetching data for AMZN...
Fetching data for META...
Fetching data for AMD...
Fetching Nasdaq-100 Index data...
 Nasdaq data successfully merged!
                                AAPL        MSFT      GOOGL      NVDA  \
Date                                                                    
2019-03-14 00:00:00-04:00  44.011356  108.250893  59.411129  4.109918   
2019-03-15 00:00:00-04:00  44.583858  109.497871  59.300034  4.215423   
2019-03-18 00:00:00-04:00  45.038986  111.066040  59.212849  4.194072   
2019-03-19 00:00:00-04:00  44.682072  111.141617  59.905838  4.361885   
2019-03-20 00:00:00-04:00  45.072536  111.018799  61.100006  4.329365   

                                TSLA       AMZN        META        AMD  \
Date                                                                     
2019-03-14 00:00:00-04:00  19.330667  84.310997  169.522446  22.

In [117]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# ✅ Step 1: Define Feature Engineering Function with RSI, MACD, and Trading Volume
def add_technical_indicators(df):
    """
    Adds key technical indicators:
    - Moving Averages (10-day, 50-day)
    - Volatility (10-day standard deviation)
    - Momentum (10-day price difference)
    - Relative Strength Index (RSI)
    - Moving Average Convergence Divergence (MACD, MACD Signal)
    - Trading Volume (Simulated)
    """
    df = df.copy()

    # Compute Moving Averages (Trend)
    df["Nasdaq_10d_MA"] = df["Nasdaq"].rolling(window=10).mean()
    df["Nasdaq_50d_MA"] = df["Nasdaq"].rolling(window=50).mean()

    # Compute Volatility (Risk)
    df["Nasdaq_Volatility"] = df["Nasdaq"].pct_change().rolling(window=10).std()

    # Compute Momentum (Price Speed)
    df["Nasdaq_Momentum"] = df["Nasdaq"].diff(periods=10)

    # Compute Relative Strength Index (RSI)
    delta = df["Nasdaq"].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    df["RSI"] = 100 - (100 / (1 + rs))

    # Compute Moving Average Convergence Divergence (MACD)
    short_ema = df["Nasdaq"].ewm(span=12, adjust=False).mean()
    long_ema = df["Nasdaq"].ewm(span=26, adjust=False).mean()
    df["MACD"] = short_ema - long_ema  # MACD Line
    df["MACD_Signal"] = df["MACD"].ewm(span=9, adjust=False).mean()  # MACD Signal Line

    # Simulate Trading Volume
    np.random.seed(42)
    df["Trading_Volume"] = np.random.randint(1000000, 5000000, df.shape[0])

    # Drop missing values caused by rolling calculations
    df.dropna(inplace=True)

    return df

# ✅ Step 2: Create Synthetic Nasdaq Data (Replace with real stock data if available)
date_range = pd.date_range(start="2023-01-01", periods=100, freq='D')
np.random.seed(42)
tech_df = pd.DataFrame({
    "Date": date_range,
    "Nasdaq": np.random.uniform(10000, 12000, 100)
})

# Convert "Date" column to index
tech_df.set_index("Date", inplace=True)

# ✅ Step 3: Apply Feature Engineering to Dataset
tech_df = add_technical_indicators(tech_df)

# ✅ Step 4: Define Target Variable (Bullish or Bearish)
tech_df["Market_Trend"] = (tech_df["Nasdaq"].shift(-1) > tech_df["Nasdaq"]).astype(int)

# ✅ Step 5: Select Features for Model Training
rf_features = [
    "Nasdaq_10d_MA", "Nasdaq_50d_MA", "Nasdaq_Volatility",
    "Nasdaq_Momentum", "RSI", "MACD", "MACD_Signal", "Trading_Volume"
]



In [118]:
tech_df["Market_Trend"] = (tech_df["Nasdaq"].shift(-1) > tech_df["Nasdaq"]).astype(int)

In [119]:
# Define input features (independent variables)
features = ["Nasdaq_10d_MA", "Nasdaq_50d_MA", "Nasdaq_Volatility", "Nasdaq_Momentum"]

# Select input features (X) and target variable (y)
X = tech_df[features]
y = tech_df["Market_Trend"]


In [120]:
print(tech_df.columns)


Index(['Nasdaq', 'Nasdaq_10d_MA', 'Nasdaq_50d_MA', 'Nasdaq_Volatility',
       'Nasdaq_Momentum', 'RSI', 'MACD', 'MACD_Signal', 'Trading_Volume',
       'Market_Trend'],
      dtype='object')


In [121]:
from sklearn.model_selection import train_test_split

# Split data (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=False)


In [122]:
from sklearn.ensemble import RandomForestClassifier

# Train a Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


In [123]:
from sklearn.metrics import accuracy_score, classification_report

# Make predictions on test data
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f" Model Accuracy: {accuracy:.2f}")

# Display classification report
report = classification_report(y_test, y_pred, target_names=["Bearish (0)", "Bullish (1)"])
print("\n🔍 Classification Report:\n", report)


 Model Accuracy: 0.64

🔍 Classification Report:
               precision    recall  f1-score   support

 Bearish (0)       0.62      0.83      0.71         6
 Bullish (1)       0.67      0.40      0.50         5

    accuracy                           0.64        11
   macro avg       0.65      0.62      0.61        11
weighted avg       0.64      0.64      0.62        11



In [125]:
import joblib

# Save the trained model to a file
joblib.dump(model, 'random_forest_model.pkl')


['random_forest_model.pkl']