<a href="https://colab.research.google.com/github/Harshit-Chouksey/CMPE257_Project_Market_Trends/blob/main/Random_Forest/Predicting_movement/Just_Predict_the_Movement.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
import yfinance as yf
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
import matplotlib.pyplot as plt
import time

def get_stock_data(ticker, start_date, end_date):
    stock_data = yf.download(ticker, start=start_date, end=end_date)
    return stock_data

def create_features(data):
    data['Date'] = data.index
    data['Year'] = data['Date'].dt.year
    data['Month'] = data['Date'].dt.month
    data['Day'] = data['Date'].dt.day
    data = data.drop(['Date'], axis=1)
    return data

def train_random_forest(data):
    X = data.drop(['Close'], axis=1)
    y = data['Close']

    model = RandomForestRegressor(n_estimators=200,
                                  max_depth=10,
                                  min_samples_split=2,
                                  min_samples_leaf=1,
                                  max_features=None,
                                  random_state=42)

    start_time = time.time()


    model.fit(X, y)


    training_time = time.time() - start_time
    print(f'Training Time: {training_time} seconds')

    return model, X.columns


def predict_price_movement(model, data, feature_columns, target_date):

    target_date = pd.to_datetime(target_date)


    most_recent_data = data.iloc[-1]


    feature_data = {col: most_recent_data[col] for col in feature_columns}
    feature_data['Year'] = target_date.year
    feature_data['Month'] = target_date.month
    feature_data['Day'] = target_date.day

    feature_set = pd.DataFrame([feature_data])


    predicted_price = model.predict(feature_set)[0]

    return predicted_price

if __name__ == "__main__":

    stock_symbol = 'MSFT'
    end_date = pd.to_datetime('today')
    start_date = end_date - pd.DateOffset(years=5)


    stock_data = get_stock_data(stock_symbol, start_date, end_date)


    stock_data_features = create_features(stock_data)


    model, feature_columns = train_random_forest(stock_data_features)


    target_date = pd.Timestamp('2023-12-08')
    predicted_price = predict_price_movement(model, stock_data_features, feature_columns, target_date)


    previous_date = pd.Timestamp('2023-12-07')
    actual_price_previous = stock_data_features.loc[previous_date, 'Close'] if previous_date in stock_data_features.index else None
    actual_price_target = stock_data_features.loc[target_date, 'Close'] if target_date in stock_data_features.index else None

    if actual_price_previous and actual_price_target:
        actual_movement = "up" if actual_price_target > actual_price_previous else "down"
        predicted_movement = "up" if predicted_price > actual_price_previous else "down"
        print(f"Actual movement from {previous_date.date()} to {target_date.date()} was {actual_movement}.")
        print(f"Predicted movement for {target_date.date()} was {predicted_movement}.")
    else:
        print("Insufficient data for the required dates.")


[*********************100%%**********************]  1 of 1 completed
Training Time: 1.2555952072143555 seconds
Actual movement from 2023-12-07 to 2023-12-08 was up.
Predicted movement for 2023-12-08 was up.
