In [None]:
import numpy as np
import pandas as pd
import os
import json
import matplotlib.pyplot as plt
import yfinance as yf
from datetime import datetime, timedelta

### Define Monte Carlo model for simulations

In [None]:
class MonteCarlo():

    def __init__(self , ticker , start_date , end_date , days , sim):
        self.ticker = ticker
        self.start = start_date
        self.end = end_date
        self.noOfSimulations = sim
        self.noOfDays = days

        self.import_stock_data()
        self.calc_log_returns()
        self.volatility_calc()
        self.run_monteCarlo()

    def import_stock_data(self):
        self.data = pd.DataFrame()

        # using Yahoo Finance to get stock data
        stock = yf.Ticker(self.ticker)
        self.data = stock.history(start=self.start)

    def calc_log_returns(self):
        """Calculate daily returns.

        For small price changes, log returns and simple returns are quite similar. However, for larger price movements, 
        log returns can provide a more accurate and realistic representation of returns, reducing bias in the simulation outcomes.
        Their additive nature, make them convenient for analytical and computational method.
        Log returns also capture skewness in financial returns.
        
        """
        self.log_returns = np.log(1 + self.data['Close'].pct_change())
        self.log_returns = self.log_returns[1:]

    def volatility_calc(self):
        """Volatility is the standard deviation of the daily returns."""
        self.daily_volatility = np.std(self.log_returns)

    def run_monteCarlo(self):
        """
        Runs a Monte Carlo simulation to predict future stock prices.

        - Uses the last recorded stock price as the starting point.
        - Simulates future prices over a set number of days using random variations based on daily volatility.
        - Repeats the simulation for multiple iterations.
        - Stores the results in a DataFrame for analysis.
        """
        # Get the last days stock price
        last_price = self.data['Close'].iloc[-1]
        self.last_price = last_price

        # Initialize a list to store all simulation results
        all_simulations = []

        for x in range(self.noOfSimulations):
            price_series = [last_price]

            for y in range(1, self.noOfDays):
                price = price_series[-1] * (1 + np.random.normal(0, self.daily_volatility))
                price_series.append(price)

            all_simulations.append(price_series)

        # Convert the list of simulations into a DataFrame all at once
        self.simulation_df = pd.DataFrame(all_simulations).transpose()

    def results(self):
        # Extract the prices for the end of the second day (which is the last row in this case)
        prices = self.simulation_df.iloc[-1]  # Last row since num_days = 2

        # Calculate the 95% confidence interval
        lower_bound = np.percentile(prices, 2.5)
        upper_bound = np.percentile(prices, 97.5)

        # Calculate the mean (expected) price
        mean_price = np.mean(prices)

        # Print the results
        print(f"{self.ticker} expected price for {self.noOfDays} days later is: {mean_price}")
        print(f"{self.ticker} 95% confidence interval for the price {self.noOfDays} days later is: ({lower_bound}, {upper_bound})")
        print("\n")
        return mean_price

    def plot(self):
        self.simulation_df.plot(legend=False)
        plt.suptitle(f'Monte Carlo Simulation for: {self.ticker}')
        plt.axhline(y = self.last_price, color = 'r', linestyle= '-')
        plt.xlabel('Day')
        plt.ylabel('Price')



In [44]:
tickers = ["MSFT", "MARA", "JNJ", "KO", "TSLA","WMT","JPM","IBM","V"]

end_date = datetime.now().strftime('%Y-%m-%d')  # Today's date in YYYY-MM-DD format
start_date = (datetime.now() - timedelta(days=10*365)).strftime('%Y-%m-%d')  # ten years before today


results = {}
simulation_results = []

# time periods
num_of_days = {
    "Quarterly": round(365/4),
    "Yearly": 365,
    "2 Years": 2*365
}

# Monte Carlo simulations
for ticker in tickers:
    row = {"Stock": ticker}
    
    for column, days in num_of_days.items():
        simulation = MonteCarlo(ticker, start_date, end_date, days, 1000)
        res = simulation.results()
        row[column] = res
    
    simulation_results.append(row)


df_results = pd.DataFrame(simulation_results)
df_results.to_csv("./data/monte_carlo_results.csv", index=False)



MSFT expected price for 91 days later is: 377.8977722005491
MSFT 95% confidence interval for the price 91 days later is: (277.33515451572293, 516.2154458843105)


MSFT expected price for 365 days later is: 383.77066904806935
MSFT 95% confidence interval for the price 365 days later is: (185.09567716199896, 703.1365163138344)


MSFT expected price for 730 days later is: 379.9766693256789
MSFT 95% confidence interval for the price 730 days later is: (129.77124483304797, 802.0472827202843)


MARA expected price for 91 days later is: 11.713589319654297
MARA 95% confidence interval for the price 91 days later is: (1.7737882870985873, 36.92280049165959)


MARA expected price for 365 days later is: 11.25160942979585
MARA 95% confidence interval for the price 365 days later is: (0.16081205602156473, 70.75455956207995)


MARA expected price for 730 days later is: 11.850236712092475
MARA 95% confidence interval for the price 730 days later is: (0.009208765965731454, 100.87079004114075)


JNJ exp

### Generate stock price predictions for previous year, current day, next year and next 2 years

In [None]:
# stocks
tickers = ["MSFT", "MARA", "JNJ", "KO", "TSLA", "WMT", "JPM", "IBM", "V"]

# year-ago dates
end_date = datetime.today().strftime("%Y-%m-%d")
start_date_yearago = (datetime.today() - timedelta(days=365)).strftime("%Y-%m-%d")

# import stock data for multiple tickers and join with Monte Carlo simulations
def get_stock_data_with_simulation(tickers, start_yearago, end):
    stock_data = {}

    for ticker in tickers:
        stock = yf.Ticker(ticker)

        # fetch historical data for year-ago price
        df_yearago = stock.history(start=start_yearago)


        # ensure data exists
        if not df_yearago.empty:
            # get the closing prices
            yearago_price = df_yearago["Close"].iloc[0] if not df_yearago.empty else None
            current_price = df_yearago["Close"].iloc[-1] if not df_yearago.empty else None

            # run Monte Carlo simulations for 1-year and 2-years ahead
            start_date_montecarlo = (datetime.now() - timedelta(days=10 * 365)).strftime('%Y-%m-%d')

            mc_1y = MonteCarlo(ticker, start_date_montecarlo, end, 365, 1000)
            mc_2y = MonteCarlo(ticker, start_date_montecarlo, end, 2 * 365, 1000)

            # Store results
            stock_data[ticker] = {
                "yearago": yearago_price,
                "current": current_price,
                "1y": mc_1y.results(),  # 1-year projection
                "2y": mc_2y.results()   # 2-year projection
            }

    return stock_data

# get stock data with Monte Carlo results
data_dict = get_stock_data_with_simulation(tickers, start_date_yearago, end_date)

# save to JSON
with open("./data/final_stock_data.json", "w") as f:
    json.dump(data_dict, f, indent=4)

print("Data saved to final_stock_data.json")

MSFT expected price for 365 days later is: 373.6857647708509
MSFT 95% confidence interval for the price 365 days later is: (186.14224621565313, 656.1113910203934)


MSFT expected price for 730 days later is: 382.275386274378
MSFT 95% confidence interval for the price 730 days later is: (143.3581339390859, 842.464647942273)


MARA expected price for 365 days later is: 11.344959327468839
MARA 95% confidence interval for the price 365 days later is: (0.1575664933353725, 75.604555801709)


MARA expected price for 730 days later is: 10.284468640948305
MARA 95% confidence interval for the price 730 days later is: (0.016170698081019848, 81.31538045039812)


JNJ expected price for 365 days later is: 163.89079456610344
JNJ 95% confidence interval for the price 365 days later is: (103.62004878565206, 244.8240338360956)


JNJ expected price for 730 days later is: 160.00802830299048
JNJ 95% confidence interval for the price 730 days later is: (87.13931989341484, 265.32292200564484)


KO expected p

### Compute compund interest

In [None]:
def calculate_percentage_changes(filename): 
    if not os.path.exists(filename):
        print(f"File not found: {filename}")
        return None

    # Read the CSV file
    df = pd.read_csv(filename)

    # Check if 'Prediction' column exists
    if 'Prediction' not in df.columns:
        print(f"'Prediction' column not found in {filename}. Columns available: {df.columns}")
        return None

    values = df['Prediction'].tolist()

    # Ensure there are enough values to calculate percentage changes
    if len(values) < 2:
        print(f"Not enough data in {filename}")
        return None

    # Calculate percentage changes
    percentage_changes = [(values[i] - values[i-1]) / values[i-1] * 100 
                         for i in range(1, len(values))]

    # Separate increases and decreases
    increases = [p for p in percentage_changes if p > 0]
    decreases = [p for p in percentage_changes if p < 0]
 
    avg_increase = int(sum(increases) / len(increases)) if increases else 0
    avg_decrease = int(sum(decreases) / len(decreases)) if decreases else 0

    # Extract ticker from filename
    ticker = filename.split('_')[0]

    return {
        'Ticker': ticker,
        'Avg Increase': f"{avg_increase:}%",
        'Avg Decrease': f"{avg_decrease:}%"
    }


In [None]:
# Stocks to analyze
tickers = ["MSFT", "MARA", "JNJ", "KO", "TSLA", "WMT", "JPM", "IBM", "V"]

# Get the last available day for each year in the past
end_years = [datetime.today().year - i for i in range(4, 0, -1)]  # 4Y, 3Y, 2Y, 1Y ago
end_dates = {f"{year} End": f"{year}-12-31" for year in end_years}  # Expected end of each year

# Monte Carlo simulation start date (10 years ago)
monte_carlo_start = (datetime.today() - timedelta(days=10 * 365)).strftime('%Y-%m-%d')

# Function to get stock data and run Monte Carlo simulations
def get_stock_data_with_simulation(tickers, end_dates, end_date):
    all_results = []

    for ticker in tickers:
        stock = yf.Ticker(ticker)
        df = stock.history(start=min(end_dates.values()), end=end_date)  # Fetch historical data

        if not df.empty:
            # Find the last available trading day for each past year
            prices = {"Ticker": ticker}
            
            for label, target_date in end_dates.items():
                year = int(label.split()[0])  # Extract year from label
                df_year = df[df.index.year == year]  # Filter for that year's data
                
                if not df_year.empty:
                    last_trading_day = df_year.index.max()  # Last available trading day
                    prices[label] = df_year.loc[last_trading_day, "Close"]
                else:
                    prices[label] = None  # If no data available

            # Monte Carlo simulations (replace MonteCarlo() with your actual function)
            mc_1y = MonteCarlo(ticker, monte_carlo_start, end_date, 365, 1000)
            mc_2y = MonteCarlo(ticker, monte_carlo_start, end_date, 2 * 365, 1000)

            # Add predictions
            prices["1Y Prediction"] = mc_1y.results()
            prices["2Y Prediction"] = mc_2y.results()

            all_results.append(prices)

    return pd.DataFrame(all_results)

# Get stock data and save results
stock_df = get_stock_data_with_simulation(tickers, end_dates, datetime.today().strftime('%Y-%m-%d'))
stock_df.to_csv("./data/stock_data.csv", index=False)

print("Data saved to final_stock_data.csv")


MSFT expected price for 365 days later is: 375.4170610528344
MSFT 95% confidence interval for the price 365 days later is: (184.59036179500515, 657.352916277537)


MSFT expected price for 730 days later is: 386.737524132061
MSFT 95% confidence interval for the price 730 days later is: (137.47204274286727, 829.4313438653675)


MARA expected price for 365 days later is: 9.974196333848912
MARA 95% confidence interval for the price 365 days later is: (0.16691564622496202, 52.516540347110016)


MARA expected price for 730 days later is: 10.811814175193026
MARA 95% confidence interval for the price 730 days later is: (0.013405552480108628, 88.46181900273358)


JNJ expected price for 365 days later is: 163.59514641426986
JNJ 95% confidence interval for the price 365 days later is: (105.80424838954885, 239.8509995043796)


JNJ expected price for 730 days later is: 165.2145255871897
JNJ 95% confidence interval for the price 730 days later is: (88.06233922717037, 286.7153597701964)


KO expected

### Calculate average increases/decreases

In [None]:
# Single file containing all tickers as rows
filename = "./data/stock_data.csv"

def calculate_percentage_changes_from_file(filename): 
    if not os.path.exists(filename):
        print(f"File not found: {filename}")
        return None

    # Read the CSV file
    df = pd.read_csv(filename)

    # Ensure required columns exist
    required_columns = ["2021 End", "2022 End", "2023 End", "2024 End","1Y Prediction", "2Y Prediction"]
    missing_columns = [col for col in required_columns if col not in df.columns]

    if missing_columns:
        print(f"Missing columns in {filename}: {missing_columns}")
        return None

    # Create a list to store results
    all_results = []

    for _, row in df.iterrows():
        ticker = row["Ticker"]
        values = [row[col] for col in required_columns if pd.notna(row[col])]  # Filter out NaN values

        # Ensure there are enough values to compute percentage changes
        if len(values) < 2:
            print(f"Not enough data for {ticker}")
            continue

        # Calculate percentage changes between consecutive values
        percentage_changes = [(values[i] - values[i-1]) / values[i-1] * 100 
                              for i in range(1, len(values))]

        # Separate increases and decreases
        increases = [p for p in percentage_changes if p > 0]
        decreases = [p for p in percentage_changes if p < 0]

        avg_increase = int(sum(increases) / len(increases)) if increases else 0
        avg_decrease = int(sum(decreases) / len(decreases)) if decreases else 0

        all_results.append({
            'Ticker': ticker,
            'Avg Increase': f"{avg_increase}%",
            'Avg Decrease': f"{avg_decrease}%"
        })

    return pd.DataFrame(all_results)

# Process the file and save results
results_df = calculate_percentage_changes_from_file(filename)

if results_df is not None and not results_df.empty:
    results_df.to_csv('./data/percentage_changes.csv', index=False)
    print("Results saved to 'percentage_changes.csv'")
    print(results_df)
else:
    print("No valid data found.")


Results saved to 'percentage_changes.csv'
  Ticker Avg Increase Avg Decrease
0   MSFT          24%         -19%
1   MARA         297%         -52%
2    JNJ           6%          -6%
3     KO           7%          -4%
4   TSLA          82%         -38%
5    WMT          29%          -3%
6    JPM          25%          -8%
7    IBM          21%           0%
8      V          17%          -2%


### Calculate compund interest for each portfolio

In [None]:
# Portfolio definitions
portfolios = {
    "Short-Term (Volatile Portfolio)": ["MSFT", "TSLA", "MARA"],
    "Moderate-Term (Stable Portfolio)": ["TSLA", "JNJ", "IBM", "WMT", "JPM"],
    "Long-Term (Moderate Portfolio)": ["KO", "WMT", "V"]
}

# Investment budgets
investment_budgets = {
    "Short-Term (Volatile Portfolio)": 500,
    "Moderate-Term (Stable Portfolio)": 8000,
    "Long-Term (Moderate Portfolio)": 30000
}

# Financial Goals
goals = {
    "Short-Term": {
        "iPhone 16": 1300,
        "Vacation Trip": 3500
    },
    "Moderate-Term": {
        "Car": 41950,
        "House": 330000
    },
    "Long-Term": {
        "Retirement": 1000000
    }
}

# Stock performance data (estimates)
all_results = results_df.set_index("Ticker").to_dict(orient="index")

# Compound interest growth years
def calculate_years_to_goal(initial, growth_rate, goal):
    if growth_rate <= 0:
        return "∞"
    years = np.log(goal / initial) / np.log(1 + growth_rate)
    return round(years, 2)

# Dividend yield fetcher
def get_dividend_yield(ticker):
    try:
        stock = yf.Ticker(ticker)
        return stock.info.get("dividendYield", 0) or 0
    except:
        return 0



# Compile all portfolio data
portfolio_results = []

for portfolio_name, stocks in portfolios.items():
    investment = investment_budgets[portfolio_name]
    investment_per_stock = investment / len(stocks)
    total_growth = 0
    total_dividends = 0

    for stock in stocks:
        avg_increase = float(all_results[stock]["Avg Increase"].replace("%", "")) / 100
        avg_decrease = float(all_results[stock]["Avg Decrease"].replace("%", "")) / 100
        net_growth = (avg_increase + avg_decrease) / 2
        dividend_yield = get_dividend_yield(stock)

        total_growth += net_growth
        total_dividends += dividend_yield

    avg_net_growth = total_growth / len(stocks)
    avg_dividend_yield = total_dividends / len(stocks)
    effective_growth = avg_net_growth + avg_dividend_yield

    # Determine goals per category
    if "Short-Term" in portfolio_name:
        goal_set = goals["Short-Term"]
    elif "Moderate-Term" in portfolio_name:
        goal_set = goals["Moderate-Term"]
    elif "Long-Term" in portfolio_name:
        goal_set = goals["Long-Term"]
    else:
        goal_set = {}

    goal_years = {}
    for goal_name, goal_value in goal_set.items():
        years_needed = calculate_years_to_goal(investment, effective_growth, goal_value)
        goal_years[goal_name] = years_needed

    portfolio_results.append({
        "Portfolio": portfolio_name,
        "Investment Amount": investment,
        "Avg Growth (no dividend)": f"{avg_net_growth*100:.2f}%",
        "Avg Dividend Yield": f"{avg_dividend_yield*100:.2f}%",
        "Effective Growth Rate": f"{effective_growth*100:.2f}%",
        **goal_years,
    })

# Create final DataFrame
df = pd.DataFrame(portfolio_results)
pd.set_option('display.max_columns', None)
print(df)

# Save to file
df.to_csv("./data/portfolio_goals_with_feedback.csv", index=False)
print("\nFile saved: portfolio_goals_with_feedback.csv")


                          Portfolio  Investment Amount  \
0   Short-Term (Volatile Portfolio)                500   
1  Moderate-Term (Stable Portfolio)               8000   
2    Long-Term (Moderate Portfolio)              30000   

  Avg Growth (no dividend) Avg Dividend Yield Effective Growth Rate  \
0                   49.00%             29.33%                78.33%   
1                   10.80%            181.80%               192.60%   
2                    7.33%            158.67%               166.00%   

   iPhone 16  Vacation Trip   Car  House  Retirement  
0       1.65           3.36   NaN    NaN         NaN  
1        NaN            NaN  1.54   3.46         NaN  
2        NaN            NaN   NaN    NaN        3.58  

File saved: portfolio_goals_with_feedback.csv


### Compute portfolios per persona

In [None]:
initials = {
    'Risky': {'initial': 500.0, 'monthly': 0.0, 'annual_return': 0.12, 'crash_drop': 0.105},         # 10.5%
    'Average': {'initial': 8000.0, 'monthly': 50.0, 'annual_return': 0.08, 'crash_drop': 0.0625},   # 6.25%
    'LongTerm': {'initial': 20000.0, 'monthly': 100.0, 'annual_return': 0.08, 'crash_drop': 0.125}  # 12.5%
}

goals = {
    'Risky': [1000.0, 10000.0, 30000.0],
    'Average': [30000.0, 150000.0, 300000.0],
    'LongTerm': [300000.0, 500000.0]
}



def months_to_goals(portfolio_key, crash=True):
    """ Return the number of months to reach each goal in 'goals[portfolio_key]',
        factoring in the crash at month 36 if crash=True.
    """
    data = initials[portfolio_key]
    monthly_rate = data['annual_return'] / 12.0
    crash_drop = data['crash_drop']
    
    # We track the portfolio value month-by-month
    # 'times' will store months needed to achieve each goal
    # If a goal is not reached by the time limit, we keep it as None
    times = [None]*len(goals[portfolio_key])
    
    value = data['initial']
    # We'll simulate up to 50 years max (600 months) or until all goals are met
    for month in range(1, 601):
        # grow the portfolio
        value *= (1 + monthly_rate)
        # add monthly contribution
        value += data['monthly']
        
        # apply crash at end of month 36 if crash==True
        if crash and month == 36:
            value *= (1 - crash_drop)
        
        # check if we have reached each goal
        for i, g in enumerate(goals[portfolio_key]):
            if times[i] is None and value >= g:
                times[i] = month  # record the month we first exceeded goal
    
        # if all goals are reached, we can stop
        if all(x is not None for x in times):
            break
    
    return times

def format_years(m):
    """Convert months to years, rounding to one decimal place."""
    if m is None:
        return "N/A"
    return f"{m/12:.1f} yrs"

# Compute times for each portfolio, with and without crash
results = {}
for pkey in ['Risky', 'Average', 'LongTerm']:
    no_crash_months = months_to_goals(pkey, crash=False)
    crash_months = months_to_goals(pkey, crash=True)
    results[pkey] = {
        'no_crash': [format_years(m) for m in no_crash_months],
        'crash': [format_years(m) for m in crash_months]
    }

# Print results
for pkey, timeline in results.items():
    print(f"{pkey} Portfolio:")
    print("  No Crash:", timeline['no_crash'])
    print("  Crash   :", timeline['crash'])
    print()

Risky Portfolio:
  No Crash: ['5.8 yrs', '25.2 yrs', '34.3 yrs']
  Crash   : ['6.8 yrs', '26.1 yrs', '35.2 yrs']

Average Portfolio:
  No Crash: ['11.1 yrs', '29.1 yrs', '37.5 yrs']
  Crash   : ['11.6 yrs', '29.6 yrs', '38.0 yrs']

LongTerm Portfolio:
  No Crash: ['27.6 yrs', '33.8 yrs']
  Crash   : ['28.7 yrs', '34.8 yrs']

