<a href="https://colab.research.google.com/github/Parthi1212-dotcom/Investment-Portfolio-through-Evolutionary-algorithms/blob/main/All_in_One_Yahoo_Finance_GA_Portfolio_Optimizer_15_Jul%2C_14_11.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import yfinance as yf
import pandas as pd
import numpy as np

# --- 1. Configuration & Parameters ---

# ==> You can change the list of stocks you want to analyze here.
EQUITY_TICKERS = [
    'AAPL', 'MSFT', 'GOOGL', 'AMZN', 'NVDA', 'TSLA',
    'JPM', 'V', 'JNJ', 'WMT', 'PG', 'MA'
]

In [3]:
# ==> Define the date range for the historical data.
START_DATE = '2016-01-01'
END_DATE = '2023-12-31'

In [4]:
# Genetic Algorithm Parameters
POPULATION_SIZE = 100  # Number of portfolios in each generation
NUM_GENERATIONS = 50   # How many generations to run the algorithm for
MUTATION_RATE = 0.01   # Probability of a small random change in a portfolio's weights
TOURNAMENT_SIZE = 5    # Number of portfolios selected to compete for breeding

# Financial Parameters
RISK_FREE_RATE = 0.02  # Annual risk-free rate (e.g., 2%)

In [5]:
# --- 2. Data Acquisition and Preparation ---

def get_price_data(tickers, start_date, end_date):
    """Downloads historical closing prices from Yahoo Finance."""
    print(f"Downloading historical data for {len(tickers)} equities...")

    data = yf.download(tickers, start=start_date, end=end_date)

    # Isolate the 'Close' prices
    price_matrix = data['Close']

    # Handle any missing values
    price_matrix.fillna(method='ffill', inplace=True)
    price_matrix.fillna(method='bfill', inplace=True)

    print("Data download and cleaning complete.")
    return price_matrix

In [6]:
# --- 3. Calculate Financial Inputs ---

def calculate_financial_inputs(price_matrix):
    """Calculates annualized returns and covariance from the price matrix."""
    # Calculate daily returns
    daily_returns = price_matrix.pct_change().dropna()

    # Calculate mean annualized returns for each stock
    mean_returns = daily_returns.mean()
    annualized_returns = mean_returns * 252 # 252 trading days in a year

    # Calculate the annualized covariance matrix
    cov_matrix = daily_returns.cov()
    annualized_cov_matrix = cov_matrix * 252

    print("Calculated annualized returns and covariance matrix.")
    return annualized_returns, annualized_cov_matrix

# --- 4. Genetic Algorithm Core Functions ---

def create_individual(num_tickers):
    """Creates a single random portfolio (an array of weights)."""
    weights = np.random.random(num_tickers)
    return weights / np.sum(weights) # Normalize weights to sum to 1

def create_population(pop_size, num_tickers):
    """Creates the initial population of random portfolios."""
    return [create_individual(num_tickers) for _ in range(pop_size)]

def calculate_fitness(weights, mean_returns, cov_matrix, risk_free_rate):
    """Calculates the fitness (Sharpe Ratio) of a single portfolio."""
    portfolio_return = np.sum(mean_returns * weights)
    portfolio_std_dev = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))

    if portfolio_std_dev == 0: return 0

    sharpe_ratio = (portfolio_return - risk_free_rate) / portfolio_std_dev
    return sharpe_ratio

def selection(population, fitness_scores):
    """Selects a parent from the population using tournament selection."""
    tournament_indices = np.random.choice(len(population), TOURNAMENT_SIZE, replace=False)
    tournament_fitness = [fitness_scores[i] for i in tournament_indices]
    winner_index = tournament_indices[np.argmax(tournament_fitness)]
    return population[winner_index]

def crossover(parent1, parent2):
    """Creates a child portfolio from two parents using one-point crossover."""
    crossover_point = np.random.randint(1, len(parent1) - 1)
    child_weights = np.concatenate((parent1[:crossover_point], parent2[crossover_point:]))
    return child_weights / np.sum(child_weights) # Re-normalize

def mutate(individual, mutation_rate):
    """Applies random mutations to an individual's weights."""
    for i in range(len(individual)):
        if np.random.random() < mutation_rate:
            individual[i] += np.random.uniform(-0.05, 0.05)
            if individual[i] < 0: individual[i] = 0
    return individual / np.sum(individual) # Re-normalize

# --- 5. Main Execution Block ---

if __name__ == '__main__':
    # Step 1: Get Data
    price_matrix = get_price_data(EQUITY_TICKERS, START_DATE, END_DATE)

    # Step 2: Calculate Financial Inputs
    mean_returns, cov_matrix = calculate_financial_inputs(price_matrix)
    num_tickers = len(EQUITY_TICKERS)

    # Step 3: Initialize Population
    population = create_population(POPULATION_SIZE, num_tickers)

    print("\n--- Starting Genetic Algorithm ---")

    # Step 4: Run Generations
    for gen in range(NUM_GENERATIONS):
        fitness_scores = [calculate_fitness(ind, mean_returns, cov_matrix, RISK_FREE_RATE) for ind in population]

        next_generation = []

        # Keep the best individual (elitism)
        best_index = np.argmax(fitness_scores)
        next_generation.append(population[best_index])

        # Create the rest of the new population
        while len(next_generation) < POPULATION_SIZE:
            parent1 = selection(population, fitness_scores)
            parent2 = selection(population, fitness_scores)
            child = crossover(parent1, parent2)
            child = mutate(child, MUTATION_RATE)
            next_generation.append(child)

        population = next_generation

        print(f"Generation {gen + 1}/{NUM_GENERATIONS} | Best Sharpe Ratio: {max(fitness_scores):.4f}")

    # Step 5: Display Final Results
    print("\n--- Genetic Algorithm Finished ---")

    final_fitness_scores = [calculate_fitness(ind, mean_returns, cov_matrix, RISK_FREE_RATE) for ind in population]
    best_portfolio_index = np.argmax(final_fitness_scores)
    best_portfolio_weights = population[best_portfolio_index]

    final_return = np.sum(mean_returns * best_portfolio_weights)
    final_std_dev = np.sqrt(np.dot(best_portfolio_weights.T, np.dot(cov_matrix, best_portfolio_weights)))
    final_sharpe = final_fitness_scores[best_portfolio_index]

    print("\nOptimal Portfolio Found:")
    print("---------------------------------")
    print(f"Annualized Return: {final_return:.2%}")
    print(f"Annualized Volatility (Risk): {final_std_dev:.2%}")
    print(f"Sharpe Ratio: {final_sharpe:.4f}")
    print("---------------------------------")
    print("\nPortfolio Allocation:")

    results = pd.Series(best_portfolio_weights, index=EQUITY_TICKERS)
    results = results[results > 0.001] # Filter out tiny weights
    results.sort_values(ascending=False, inplace=True)

    for ticker, weight in results.items():
        print(f"{ticker}: {weight:.2%}")

Downloading historical data for 12 equities...


  data = yf.download(tickers, start=start_date, end=end_date)
[*********************100%***********************]  12 of 12 completed
  price_matrix.fillna(method='ffill', inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  price_matrix.fillna(method='ffill', inplace=True)
  price_matrix.fillna(method='bfill', inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  price_matrix.fillna(method='bfill', inplace=True)


Data download and cleaning complete.
Calculated annualized returns and covariance matrix.

--- Starting Genetic Algorithm ---
Generation 1/50 | Best Sharpe Ratio: 1.2467
Generation 2/50 | Best Sharpe Ratio: 1.2857
Generation 3/50 | Best Sharpe Ratio: 1.2857
Generation 4/50 | Best Sharpe Ratio: 1.3012
Generation 5/50 | Best Sharpe Ratio: 1.3131
Generation 6/50 | Best Sharpe Ratio: 1.3270
Generation 7/50 | Best Sharpe Ratio: 1.3296
Generation 8/50 | Best Sharpe Ratio: 1.3314
Generation 9/50 | Best Sharpe Ratio: 1.3363
Generation 10/50 | Best Sharpe Ratio: 1.3391
Generation 11/50 | Best Sharpe Ratio: 1.3456
Generation 12/50 | Best Sharpe Ratio: 1.3490
Generation 13/50 | Best Sharpe Ratio: 1.3495
Generation 14/50 | Best Sharpe Ratio: 1.3519
Generation 15/50 | Best Sharpe Ratio: 1.3527
Generation 16/50 | Best Sharpe Ratio: 1.3530
Generation 17/50 | Best Sharpe Ratio: 1.3533
Generation 18/50 | Best Sharpe Ratio: 1.3541
Generation 19/50 | Best Sharpe Ratio: 1.3544
Generation 20/50 | Best Shar