<a href="https://colab.research.google.com/github/Harshit-Chouksey/CMPE257_Project_Market_Trends/blob/main/Random_Forest/Hyperparameter_Exploration/Tesla_with_parameter_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import yfinance as yf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

# Function to get historical stock data using Yahoo Finance API
def get_stock_data(ticker, start_date, end_date):
    stock_data = yf.download(ticker, start=start_date, end=end_date)
    return stock_data

# Function to create features for prediction
def create_features(data):
    data['Date'] = data.index
    data['Year'] = data['Date'].dt.year
    data['Month'] = data['Date'].dt.month
    data['Day'] = data['Date'].dt.day
    data = data.drop(['Date'], axis=1)
    return data

# Function to train and evaluate the Random Forest model
def train_random_forest(data, n_estimators, max_depth, min_samples_split, min_samples_leaf, max_features):
    X = data.drop(['Close'], axis=1)
    y = data['Close']

    # Split the data into training and testing sets (70% train, 30% test)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    # Initialize the Random Forest model
    model = RandomForestRegressor(
        n_estimators=n_estimators,
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        max_features=max_features,
        random_state=42
    )

    # Train the model
    model.fit(X_train, y_train)

    # Make predictions on the test set
    predictions = model.predict(X_test)

    # Evaluate the model
    mse = mean_squared_error(y_test, predictions)
    print(f'Mean Squared Error (n_estimators={n_estimators}, max_depth={max_depth}, min_samples_split={min_samples_split}, min_samples_leaf={min_samples_leaf}, max_features={max_features}): {mse}')

if __name__ == "__main__":
    # Set the stock symbol, start date, and end date (last 5 years)
    stock_symbol = 'TSLA'
    end_date = pd.to_datetime('today')
    start_date = end_date - pd.DateOffset(years=5)

    # Get historical stock data
    stock_data = get_stock_data(stock_symbol, start_date, end_date)

    # Create features for prediction
    stock_data_features = create_features(stock_data)

    # Define variations of hyperparameters to test
    n_estimators_values = [100, 200]
    max_depth_values = [None, 10, 20]
    min_samples_split_values = [2, 5]
    min_samples_leaf_values = [1, 2]
    max_features_values = [1.0, 'sqrt', 0.5]

    # Iterate over variations and train/evaluate the model
    for n_estimators in n_estimators_values:
        for max_depth in max_depth_values:
            for min_samples_split in min_samples_split_values:
                for min_samples_leaf in min_samples_leaf_values:
                    for max_features in max_features_values:
                        train_random_forest(
                            stock_data_features,
                            n_estimators=n_estimators,
                            max_depth=max_depth,
                            min_samples_split=min_samples_split,
                            min_samples_leaf=min_samples_leaf,
                            max_features=max_features
                        )


[*********************100%%**********************]  1 of 1 completed
Mean Squared Error (n_estimators=100, max_depth=None, min_samples_split=2, min_samples_leaf=1, max_features=1.0): 0.8006238359940882
Mean Squared Error (n_estimators=100, max_depth=None, min_samples_split=2, min_samples_leaf=1, max_features=sqrt): 7.510508757491216
Mean Squared Error (n_estimators=100, max_depth=None, min_samples_split=2, min_samples_leaf=1, max_features=0.5): 2.449629064419471
Mean Squared Error (n_estimators=100, max_depth=None, min_samples_split=2, min_samples_leaf=2, max_features=1.0): 0.7373177183942361
Mean Squared Error (n_estimators=100, max_depth=None, min_samples_split=2, min_samples_leaf=2, max_features=sqrt): 7.655069115443085
Mean Squared Error (n_estimators=100, max_depth=None, min_samples_split=2, min_samples_leaf=2, max_features=0.5): 2.3983456093697204
Mean Squared Error (n_estimators=100, max_depth=None, min_samples_split=5, min_samples_leaf=1, max_features=1.0): 0.9535904732430501
