# MarketPredictorX with Historical Analysis and Forecasting

This notebook contains a Python-based stock price predictor. The app provides real-time stock prices, makes ML predictions for the next day's price with accuracy exceeds 98%, analyzes historical data, calculates moving averages, measures volatility, and offers comparison between two stocks. This tool can be used to explore stock trends and potential movements. The project was built using the `yfinance` library for data retrieval and `matplotlib` for visualizations.


## Import required libraries

In [31]:
import sqlite3
import yfinance as yf
from datetime import datetime
import pandas as pd
import matplotlib.pyplot as plt
import os
import getpass
import bcrypt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
import numpy as np

## Functions to sign in/up functions and save data in database (SQLite)

In [3]:
conn = sqlite3.connect('market_predictor.db')
cursor = conn.cursor()

cursor.execute('''
CREATE TABLE IF NOT EXISTS users (
    username TEXT PRIMARY KEY,
    password TEXT NOT NULL,
    favorite_stocks TEXT
)
''')
conn.commit()

def hash_password(password):
    salt = bcrypt.gensalt()
    hashed_pw = bcrypt.hashpw(password.encode('utf-8'), salt)
    return hashed_pw

def verify_password(plain_password, hashed_password):
    return bcrypt.checkpw(plain_password.encode('utf-8'), hashed_password)

def sign_up():
    username = input("Create a username: ")
    
    cursor.execute("SELECT * FROM users WHERE username=?", (username,))
    if cursor.fetchone():
        print("Username already exists. Try another.")
        return sign_up()
    
    password = getpass.getpass("Create a password: ")
    hashed_pw = hash_password(password)  # Hash the password
    
    favorite_stocks = input("Enter the stock symbols you're interested in (comma-separated): ").upper().replace(' ', '')
    
    cursor.execute("INSERT INTO users (username, password, favorite_stocks) VALUES (?, ?, ?)",
                   (username, hashed_pw, favorite_stocks))
    conn.commit()
    print("Sign up successful!")

def sign_in():
    username = input("Enter username: ")
    
    cursor.execute("SELECT password FROM users WHERE username=?", (username,))
    result = cursor.fetchone()
    
    if result:
        stored_password = result[0]
        password = getpass.getpass("Enter password: ")
        
        if verify_password(password, stored_password): 
            print(f"Welcome {username}!")
            return username
        else:
            print("Incorrect password.")
            return None
    else:
        print("Username not found. Please sign up first.")
        return None

## 1. Welcome Message and Date Display

In [4]:
def welcome(model_accuracy):
    print("Welcome to the MarketPredictorX App!")
    print(f"The prediction model's accuracy (R^2 score) is: {model_accuracy*100:.2f}%\n")

def print_today_date():
    today = datetime.now().strftime("%Y-%m-%d")
    print(f"Today's date is: {today}\n")
    
print_today_date()

## 2. Fetch and Display Stock Prices

In [5]:
def list_favorite_stocks(username):
    cursor.execute("SELECT favorite_stocks FROM users WHERE username=?", (username,))
    result = cursor.fetchone()
    
    if result:
        favorite_stocks = result[0].split(',')
        stock_prices = {}
        for stock in favorite_stocks:
            ticker = yf.Ticker(stock)
            hist = ticker.history(period="1d")
            if not hist.empty:
                stock_prices[stock] = hist['Close'].iloc[-1]
            else:
                stock_prices[stock] = None
        print("Here are your favorite stocks and their current prices:")
        for stock, price in stock_prices.items():
            if price is not None:
                print(f"{stock}: ${price:.2f}")
            else:
                print(f"{stock}: Price not available")
        return stock_prices
    else:
        print(f"No favorite stocks found for {username}")
        return {}

## 3. Stock Price Prediction 

In [6]:
def predict_next_price_ml(model, X_test, current_price):
    next_price = model.predict([[current_price]])
    return round(next_price[0], 2)

In [28]:
def predict_prices_for_days(model, current_price, days):
    predicted_prices = []
    last_price = current_price
    for _ in range(days):
        next_price = model.predict([[last_price]])  
        if isinstance(next_price, (list, np.ndarray)):
            next_price = next_price[0] 
        predicted_prices.append(round(next_price, 2)) 
        last_price = next_price  # Use the predicted price as the next input for the following day
    return predicted_prices

## 4. Fetch historical prices of a stock for the last N days

In [8]:
def fetch_last_n_days(stock, days):
    ticker = yf.Ticker(stock)
    hist = ticker.history(period=f"{days}d")
    if not hist.empty:
        df = hist[['Close']].reset_index()
        df.columns = ['Date', 'Price']
        print(f"\nLast {days} days of prices for {stock}:")
        print(df)
        return df
    else:
        print(f"No data available for {stock} in the last {days} days.")
        return None

## 5. Train the linear regression model on stock data

In [9]:
def train_model(stock_symbol):
    ticker = yf.Ticker(stock_symbol)
    hist = ticker.history(period="10y")
    
    if hist.empty:
        print("No data available for training.")
        return None, None, None
    
    # Prepare the dataset
    hist['PriceShifted'] = hist['Close'].shift(-1)
    hist = hist.dropna()
    
    X = hist[['Close']]  # Current price
    y = hist['PriceShifted']  # Next day's price
    
    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Train the model
    model = LinearRegression()
    model.fit(X_train, y_train)
    
    # Test the model
    y_pred = model.predict(X_test)
    model_accuracy = r2_score(y_test, y_pred)
        
    return model, X_test, model_accuracy

## 6. Plot stock prices

In [10]:
def plot_stock_prices(df, stock, days):
    plt.figure(figsize=(10, 6))
    plt.plot(df['Date'], df['Price'], marker='o', linestyle='-', color='b')
    plt.title(f'{stock} Stock Prices - Last {days} Days')
    plt.xlabel('Date')
    plt.ylabel('Price (USD)')
    plt.xticks(rotation=45)
    plt.grid(True)
    plt.tight_layout()
    plt.show()

## 7. Calculate and display moving averages

In [11]:
def display_moving_averages(df, stock, days):
    df['SMA10'] = df['Price'].rolling(window=10).mean()
    df['SMA30'] = df['Price'].rolling(window=30).mean()

    sma10_avg = df['SMA10'].mean()
    sma30_avg = df['SMA30'].mean()

    df['SMA10'].fillna(sma10_avg, inplace=True)
    df['SMA30'].fillna(sma30_avg, inplace=True)

    print(f"\n{stock} Moving Averages for Last {days} Days (with NaN values filled with averages):")
    print(df[['Date', 'Price', 'SMA10', 'SMA30']])

    plt.figure(figsize=(10, 6))
    plt.plot(df['Date'], df['Price'], marker='o', linestyle='-', color='b', label='Price')
    plt.plot(df['Date'], df['SMA10'], linestyle='--', color='g', label='10-Day SMA')
    plt.plot(df['Date'], df['SMA30'], linestyle='--', color='r', label='30-Day SMA')
    plt.title(f'{stock} Stock Prices and Moving Averages - Last {days} Days')
    plt.xlabel('Date')
    plt.ylabel('Price (USD)')
    plt.xticks(rotation=45)
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

## 8. Calculate volatility (standard deviation of % change)


In [12]:
def calculate_volatility(df, days, stock_symbol):
    df['PctChange'] = df['Price'].pct_change()
    
    if df['PctChange'].count() >= days:
        volatility = df['PctChange'].rolling(window=days).std() * 100
        latest_volatility = volatility.iloc[-1]
        if pd.notna(latest_volatility):
            print(f"\nVolatility (Standard Deviation of % Change) over {days} days: {latest_volatility:.2f}%")
            return latest_volatility
        else:
            print(f"\nNot enough data to calculate volatility over {days} days.")
            return None
    else:
        if days < 180:
            print(f"\nNot enough data to calculate volatility over {days} days.")
        else:
            ticker = yf.Ticker(stock_symbol)
            hist = ticker.history(period=f"{days}d")

            if hist.empty:
                print("\nNo data available for the last 6 months.")
                return None

            hist['PctChange'] = hist['Close'].pct_change()
            six_month_volatility = hist['PctChange'].std() * 100
            if pd.notna(six_month_volatility):
                print(f"\nVolatility (Standard Deviation of % Change) over the last {days} days: {six_month_volatility:.2f}%")
                return six_month_volatility
            else:
                print("\nNot enough data to calculate volatility over the last 6 months.")
                return None
            
        user_choice = input("Would you like to calculate volatility over the last 6 months? (yes/no): ").lower()
        if user_choice == 'yes':
            ticker = yf.Ticker(stock_symbol)
            hist = ticker.history(period="6mo")

            if hist.empty:
                print("\nNo data available for the last 6 months.")
                return None

            hist['PctChange'] = hist['Close'].pct_change()
            six_month_volatility = hist['PctChange'].std() * 100
            if pd.notna(six_month_volatility):
                print(f"\nVolatility (Standard Deviation of % Change) over the last 6 months: {six_month_volatility:.2f}%")
                return six_month_volatility
            else:
                print("\nNot enough data to calculate volatility over the last 6 months.")
                return None
        else:
            print("Skipping extended volatility calculation.")
            return None



## 9. Compare two stocks' performance


In [13]:
def compare_two_stocks(stock1, stock2, days):
    df1 = fetch_last_n_days(stock1, days)
    df2 = fetch_last_n_days(stock2, days)
    
    if df1 is not None and df2 is not None:
        plt.figure(figsize=(10, 6))
        plt.plot(df1['Date'], df1['Price'], label=f'{stock1} Price', marker='o', linestyle='-', color='b')
        plt.plot(df2['Date'], df2['Price'], label=f'{stock2} Price', marker='o', linestyle='-', color='r')
        plt.title(f'{stock1} vs {stock2} Stock Prices - Last {days} Days')
        plt.xlabel('Date')
        plt.ylabel('Price (USD)')
        plt.xticks(rotation=45)
        plt.legend()
        plt.grid(True)
        plt.tight_layout()
        plt.show()


## 10. Export data to CSV


In [14]:
def export_to_csv(df, stock, days):
    filename = f"{stock}_last_{days}_days.csv"
    df.to_csv(filename, index=False)
    print(f"\nData for {stock} exported to {filename}")

## Main function to run the app

In [29]:
def run_stock_predictor():
    stock_symbol = "AAPL"
    model, X_test, model_accuracy = train_model(stock_symbol)
    
    if model is None:
        print("Failed to train model. Exiting.")
        return
    
    # Welcome user and show accuracy
    welcome(model_accuracy)
    
    print_today_date()

def run_stock_predictor():
    stock_symbol = "AAPL"
    model, X_test, model_accuracy = train_model(stock_symbol)
    
    if model is None:
        print("Failed to train model. Exiting.")
        return
    
    # Welcome user and show accuracy
    welcome(model_accuracy)
    
    print_today_date()

    while True:
        user_action = input("1. Sign up \n2. Sign in \n3. Exit \nDo you want to sign up or sign in? (1 or 2 or 3): ").lower()
        if user_action == "1":
            sign_up()
        elif user_action == "2":
            username = sign_in()
            if username:
                while True:
                    list_favorite_stocks(username)
                    stock_choice = input("\nEnter a stock symbol to predict the price for (or type 'exit' to quit): ").upper()
                    if stock_choice == 'EXIT':
                        print("Logging out...")
                        break

                    # Train the model for the chosen stock
                    model, X_test, model_accuracy = train_model(stock_choice)
                    if model is None:
                        print(f"Unable to train the model for {stock_choice}. Please try a different stock symbol.")
                        continue

                    # Fetch current stock price
                    ticker = yf.Ticker(stock_choice)
                    current_data = ticker.history(period="1d")
                    if not current_data.empty:
                        current_price = current_data['Close'].iloc[-1]
                        predicted_price = predict_next_price_ml(model, X_test, current_price)
                        print(f"Current price of {stock_choice}: ${current_price:.2f}")
                        print(f"Predicted price for tomorrow: ${predicted_price:.2f}")
                        
                        # Offer option to predict next 7 or 30 days
                        extended_prediction = input("Would you like to predict prices for the next 7 or 30 days? (yes/no): ").lower()
                        if extended_prediction == 'yes':
                            valid_input = False
                            while not valid_input:
                                prediction_days = int(input("Enter the number of days to predict (7 or 30): "))
                                if prediction_days in [7, 30]:
                                    valid_input = True
                                    predicted_prices = predict_prices_for_days(model, current_price, prediction_days)
                                    print(f"Predicted prices for the next {prediction_days} days: {predicted_prices}")

                                    # Plot last 7 days + predicted prices
                                    df_last_days = fetch_last_n_days(stock_choice, 7)
                                    if df_last_days is not None:
                                        future_dates = pd.date_range(start=df_last_days['Date'].iloc[-1] + pd.Timedelta(days=1), periods=prediction_days)
                                        df_predictions = pd.DataFrame({'Date': future_dates, 'Price': predicted_prices})
                                        df_combined = pd.concat([df_last_days, df_predictions], ignore_index=True)
                                        plot_stock_prices(df_combined, stock_choice, 7 + prediction_days)
                                else:
                                    print("Invalid input. Please enter either 7 or 30.")

                    else:
                        print(f"No data available for {stock_choice}.")


                    # Historical prices and other options
                    view_history = input("\nWould you like to see historical prices? (yes/no): ").lower()
                    if view_history == 'yes':
                        print("1. 7 days (One Week)")
                        print("2. 14 days (Two Week)")
                        print("3. 30 days (One Month)")
                        print("4. 90 days (Three Months)")
                        print("5. 180 days (Six Months)")
                        print("6. 270 days (Nine Months)")
                        print("7. 365 days (One Year)")
                        print("8. 730 days (Two Years)")
                        print("9. 1,095 days (Three Years)")
                        days_choice = input("Choose the duration (Write number of your choice): ")
                        days = {
                            "1": 7,
                            "2": 14,
                            "3": 30,
                            "4": 90,
                            "5": 180,
                            "6": 270,
                            "7": 365,
                            "8": 730,
                            "9": 1095
                        }.get(days_choice)
                        
                        if days:
                            df = fetch_last_n_days(stock_choice, days)
                            if df is not None:
                                plot_stock_prices(df, stock_choice, days)

                                show_moving_averages = input("Would you like to see moving averages? (yes/no): ").lower()
                                if show_moving_averages == 'yes':
                                    display_moving_averages(df, stock_choice, days)

                                show_volatility = input("Would you like to see volatility? (yes/no): ").lower()
                                if show_volatility == 'yes':
                                    calculate_volatility(df, int(days), stock_choice)

                                export_option = input("Would you like to export this data to CSV? (yes/no): ").lower()
                                if export_option == 'yes':
                                    export_to_csv(df, stock_choice, days)
                            else:
                                print(f"No data available for {stock_choice} in the last {days} days.")
                        else:
                            print("Please enter a valid option (1, 2, or 3).")

                    compare = input("Would you like to compare with another stock? (yes/no): ").lower()
                    if compare == 'yes':
                        stock2 = input("Enter the symbol of another stock: ").upper()
                        compare_two_stocks(stock_choice, stock2, days)

        elif user_action == "3":
            print("Thank you for using MarketPredictorX")
            break


## Run the app

In [32]:
if __name__ == "__main__":
    run_stock_predictor()