<a href="https://colab.research.google.com/github/Murarin8877/FinancialBigDataAnalysis-homework/blob/main/week13%E4%BD%9C%E6%A5%AD_%E5%9F%BA%E5%9B%A0%E6%BC%94%E7%AE%97%E6%B3%95.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import yfinance as yf
import pandas as pd
import numpy as np

# 定义股票列表
start_date = '2020-01-01'
end_date = '2023-01-01'

stock_symbols = ['2330.TW', '1201.TW', '2454.TW', '2357.TW', '4426.TW', '8271.TW']

# 下載股票資料
def get_stock_data(symbols, start, end):
    stock_data = yf.download(symbols, start=start, end=end)['Adj Close']
    return stock_data

# 計算年化報酬率
def annualized_return(stock_data):
    daily_return = stock_data.pct_change().dropna()
    annual_return = daily_return.mean() * 252  # 每年有252個交易日
    return annual_return

# 計算年化波動率
def annualized_volatility(stock_data):
    daily_return = stock_data.pct_change().dropna()
    annual_volatility = daily_return.std() * np.sqrt(252)  # 每年有252個交易日
    return annual_volatility

# 計算投資組合的年化報酬率和年化波動率
def portfolio_performance(weights, returns, covariance_matrix):
    portfolio_return = np.dot(weights, returns)
    portfolio_volatility = np.sqrt(np.dot(weights.T, np.dot(covariance_matrix, weights)))
    return portfolio_return, portfolio_volatility

def max_drawdown(cumulative_returns):
    # 計算累積回報的最大回撤
    peak = cumulative_returns[0]
    max_dd = 0
    for value in cumulative_returns:
        peak = max(peak, value)
        drawdown = (peak - value) / peak
        max_dd = max(max_dd, drawdown)
    return max_dd

def management_complexity(chromosome):
    # 計算選擇的股票數量（即基因中為1的個數）
    selected_stocks = np.sum(chromosome)
    return selected_stocks

def transaction_cost(current_chromosome, previous_chromosome, cost_per_trade=0.001):
    # 計算交易成本：即基因變化的數量，並乘以每次交易的成本
    changes = np.sum(current_chromosome != previous_chromosome)  # 計算基因的變化數量
    return changes * cost_per_trade

# 更新fitness函數，使其只返回適應度值
def fitness_function(chromosome, returns, covariance_matrix, stock_data, previous_chromosome=None, cost_per_trade=0.001):
    selected_returns = returns * chromosome
    selected_covariance = covariance_matrix * (chromosome[:, None] * chromosome[None, :])

    portfolio_return, portfolio_volatility = portfolio_performance(chromosome, selected_returns, selected_covariance)

    if np.isnan(portfolio_return) or np.isnan(portfolio_volatility):
        return 0  # 如果報酬率或波動率為NaN，返回0

    if portfolio_volatility == 0:
        sharpe_ratio = 0  # 避免分母為零
    else:
        sharpe_ratio = portfolio_return / portfolio_volatility

    cumulative_returns = (1 + selected_returns).cumprod()
    max_dd = max_drawdown(cumulative_returns)

    complexity = management_complexity(chromosome)

    transaction_cost_val = 0
    if previous_chromosome is not None:
        transaction_cost_val = transaction_cost(chromosome, previous_chromosome, cost_per_trade)

    # 最終適應度是單一數值
    fitness = 0.4 * sharpe_ratio + 0.2 * (1 - max_dd) + 0.2 * complexity - 0.1 * transaction_cost_val

    return fitness





# 初始化族群
def initialize_population(pop_size, num_stocks):
    # 使用隨機生成0或1的基因組合，這裡設定一定比例的0和1，以增加多樣性
    population = np.random.randint(2, size=(pop_size, num_stocks))
    return population


# 選擇父母
def selection(population, fitness):
    fitness = fitness - np.min(fitness) if np.min(fitness) < 0 else fitness
    total_fitness = np.sum(fitness)

    # 如果適應度總和為零，均勻選擇
    if total_fitness == 0:
        prob = np.ones(len(fitness)) / len(fitness)
    else:
        prob = fitness / total_fitness

    # 使用概率選擇
    selected_parents = population[np.random.choice(population.shape[0], size=2, p=prob)]
    return selected_parents

# 交叉操作
def crossover(parent1, parent2):
    crossover_point = np.random.randint(1, len(parent1))
    offspring1 = np.concatenate([parent1[:crossover_point], parent2[crossover_point:]])
    offspring2 = np.concatenate([parent2[:crossover_point], parent1[crossover_point:]])
    return offspring1, offspring2

# 突變操作
def mutation(offspring, mutation_rate):
    for i in range(len(offspring)):
        if np.random.rand() < mutation_rate:
            offspring[i] = 1 - offspring[i]  # 翻轉基因
    return offspring

# 替換操作
def replace(population, offspring, fitness):
    sorted_indices = np.argsort(fitness)[::-1]
    population[:len(offspring)] = offspring
    return population

In [4]:
# 基因演算法參數
population_size = 20 # 族群的大小
num_generations = 200 # 世代數
mutation_rate = 0.8 # 突變率
num_stocks = len(stock_symbols) # 有幾支股票

In [6]:
# 基因演算法主體
def genetic_algorithm(stock_symbols, start_date, end_date, pop_size=100, generations=100, mutation_rate=0.01, cost_per_trade=0.001):
    # 下載股票數據
    stock_data = get_stock_data(stock_symbols, start_date, end_date)

    # 計算每支股票的年化報酬率和年化波動率
    returns = annualized_return(stock_data)
    volatility = annualized_volatility(stock_data)

    # 計算股票之間的協方差矩陣
    daily_return = stock_data.pct_change().dropna()
    covariance_matrix = daily_return.cov() * 252  # 協方差矩陣年化

    # 初始化族群
    population = initialize_population(pop_size, len(stock_symbols))

    best_fitness = -np.inf  # 初始化最佳適應度
    best_portfolio = None   # 初始化最佳投資組合
    previous_population = None  # 初始代無前一代

    for generation in range(generations):
        # 計算每個染色體的適應度
        fitness_scores = [fitness_function(chromosome, returns, covariance_matrix, stock_data, previous_chromosome=None, cost_per_trade=cost_per_trade) for chromosome in population]

        # 選擇兩個父母進行交配
        parents = selection(population, fitness_scores)

        # 進行交叉
        offspring1, offspring2 = crossover(parents[0], parents[1])

        # 進行突變
        offspring1 = mutation(offspring1, mutation_rate)
        offspring2 = mutation(offspring2, mutation_rate)

        # 形成新一代族群
        offspring = np.array([offspring1, offspring2])

        # 替換舊族群
        population = replace(population, offspring, fitness_scores)

        # 更新最好的結果
        max_fitness_idx = np.argmax(fitness_scores)
        if fitness_scores[max_fitness_idx] > best_fitness:
            best_fitness = fitness_scores[max_fitness_idx]
            best_portfolio = population[max_fitness_idx]

        # 打印每代最好的適應度
        print(f"Generation {generation + 1}, Best Fitness: {best_fitness}")

        # 記錄當前族群
        previous_population = population.copy()

    return best_portfolio, best_fitness



# # 執行基因演算法
# best_portfolio, best_fitness = genetic_algorithm(stock_symbols, start_date, end_date, generations=100)

# print("\nBest Portfolio:", best_portfolio)
# print("Best Fitness Score:", best_fitness)


In [7]:
# 添加总回报、总风险和夏普比率计算
def calculate_portfolio_metrics(portfolio, returns, covariance_matrix):
    """根据最佳投资组合计算总回报、总风险和夏普比率"""
    portfolio_weights = portfolio / np.sum(portfolio)  # 确保权重总和为1
    portfolio_return = np.dot(portfolio_weights, returns)
    portfolio_volatility = np.sqrt(np.dot(portfolio_weights.T, np.dot(covariance_matrix, portfolio_weights)))
    risk_free_rate = 0.01  # 假设无风险收益率为1%
    sharpe_ratio = (portfolio_return - risk_free_rate) / portfolio_volatility
    return portfolio_return, portfolio_volatility, sharpe_ratio

# 在遗传算法执行完成后，计算最佳组合的相关指标
best_portfolio, best_fitness = genetic_algorithm(stock_symbols, start_date, end_date, generations=100)

# 计算最佳组合的总回报、总风险和夏普比率
annual_returns = annualized_return(get_stock_data(stock_symbols, start_date, end_date))  # 年化收益率
cov_matrix = get_stock_data(stock_symbols, start_date, end_date).pct_change().dropna().cov() * 252  # 年化协方差矩阵

portfolio_return, portfolio_volatility, sharpe_ratio = calculate_portfolio_metrics(
    best_portfolio, annual_returns, cov_matrix
)

# 输出最佳组合和其指标
print("\nBest Portfolio (Stock Selection):", best_portfolio)
print("Best Fitness Score:", best_fitness)
print(f"Total Annual Return: {portfolio_return:.2%}")
print(f"Total Risk (Annual Volatility): {portfolio_volatility:.2%}")
print(f"Sharpe Ratio: {sharpe_ratio:.2f}")


[*********************100%***********************]  6 of 6 completed
  peak = cumulative_returns[0]


Generation 1, Best Fitness: 1.39267849926402
Generation 2, Best Fitness: 1.39267849926402
Generation 3, Best Fitness: 1.39267849926402
Generation 4, Best Fitness: 1.4492081420724345
Generation 5, Best Fitness: 1.5646657623349123
Generation 6, Best Fitness: 1.5646657623349123
Generation 7, Best Fitness: 1.5646657623349123
Generation 8, Best Fitness: 1.5646657623349123
Generation 9, Best Fitness: 1.5646657623349123
Generation 10, Best Fitness: 1.5646657623349123
Generation 11, Best Fitness: 1.5646657623349123
Generation 12, Best Fitness: 1.5646657623349123
Generation 13, Best Fitness: 1.5646657623349123
Generation 14, Best Fitness: 1.5646657623349123
Generation 15, Best Fitness: 1.5646657623349123
Generation 16, Best Fitness: 1.5646657623349123
Generation 17, Best Fitness: 1.5646657623349123
Generation 18, Best Fitness: 1.5646657623349123
Generation 19, Best Fitness: 1.5646657623349123
Generation 20, Best Fitness: 1.5646657623349123
Generation 21, Best Fitness: 1.5646657623349123
Generat

[**********************50%                       ]  3 of 6 completed

Generation 99, Best Fitness: 1.5646657623349123
Generation 100, Best Fitness: 1.5646657623349123


[*********************100%***********************]  6 of 6 completed
[*********************100%***********************]  6 of 6 completed


Best Portfolio (Stock Selection): [0 0 1 0 1 1]
Best Fitness Score: 1.5646657623349123
Total Annual Return: 6.87%
Total Risk (Annual Volatility): 25.36%
Sharpe Ratio: 0.23



