<a href="https://colab.research.google.com/github/PinRay22/Big-Data-Analytics-for-Finance/blob/main/week13.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
!pip install yfinance



In [11]:
# 匯入必要的套件
import yfinance as yf  # 用於抓取股票數據的 Yahoo Finance API
import numpy as np  # 用於數值計算和陣列操作的數學工具
import random  # 用於基因演算法中隨機選擇、交配和突變的工具

# 固定隨機種子
np.random.seed(42)
random.seed(42)

In [12]:
# 計算年化報酬
def calculate_annual_return(data):
    # 計算總期間的年數
    total_days = (data.index[-1] - data.index[0]).days
    years = total_days / 365.0

    # 初始值
    start_value = data.iloc[0]
    # 終值
    end_value = data.iloc[-1]
    return (end_value / start_value) ** (1 / years) - 1


# 計算年化波動率
def calculate_annual_risk(data):
    # 計算每日收益率並移除空值
    daily_returns = data.pct_change().dropna()
    # 計算年化波動率
    return daily_returns.std() * np.sqrt(len(daily_returns))


def fetch_stock_data(symbols, start_date, end_date):
    # 儲存每隻股票的年化報酬率與年化波動率
    stock_returns = []
    stock_risks = []

    # 迭代所有股票
    for symbol in symbols:
        data = yf.download(symbol + '.TW', start=start_date, end=end_date)['Adj Close']

        # 計算年化報酬率
        annual_return = calculate_annual_return(data)
        # 計算年化波動率
        annual_risk = calculate_annual_risk(data)

        # 加入至清單
        stock_returns.append(annual_return)
        stock_risks.append(annual_risk)

    # 回傳每支股票的年化報酬清單、年化波動率
    return np.array(stock_returns), np.array(stock_risks)

In [13]:
start_date = '2020-01-01'
end_date = '2023-01-01'

stock_symbols = ['2330', '1201', '2454', '2357', '4426', '8271']

# 取得 stock_symbols 所有股價資料
stock_returns, stock_risks = fetch_stock_data(stock_symbols, start_date, end_date)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [14]:
# 基因演算法參數
population_size = 20 # 族群的大小
num_generations = 200 # 世代數
mutation_rate = 0.2 # 突變率
num_stocks = len(stock_symbols) # 有幾支股票

In [15]:
# 適應函數
def fitness(chromosome):
    # 計算投資組合的總報酬
    portfolio_return = np.sum(chromosome * stock_returns)
    # 計算投資組合的總風險（假設無相關性）
    portfolio_risk = np.sqrt(np.sum((chromosome * stock_risks) ** 2))
    # 計算選中的股票數量
    stock_count = np.sum(chromosome)
    # 計算夏普比率（簡化版，未扣無風險利率）
    if portfolio_risk > 0: # 分母不得為 0
        sharpe_ratio = portfolio_return / portfolio_risk
    else:
        sharpe_ratio = 0
    # 適應值 = 最大化夏普比率 - 股票數量懲罰
    # 在投資中，選擇過多的股票會增加管理複雜度和交易成本。這裡通過「股票數量懲罰項」來限制選擇過多股票。
    return 0.8 * sharpe_ratio - 0.006 * stock_count

# 初始化族群
def initialize_population():
    return [np.random.randint(0, 2, num_stocks) for _ in range(population_size)]

# 選擇
def select(population, fitness_scores):
    # 找到最小適應值
    min_fitness = min(fitness_scores)
    #  計算權重（所有直向右位移避免負值）
    shifted_fitness = [f - min_fitness + 1e-6 for f in fitness_scores]
    # 平移後適應值的總和
    total_fitness = sum(shifted_fitness)
    # 計算選擇機率
    probs = [f / total_fitness for f in shifted_fitness]
    # 加權隨機選擇
    return population[np.random.choice(range(population_size), p=probs)]

# 交配
def crossover(parent1, parent2):
    '''
    舉例:
    隨機產生交配點: 假設為3
    parent1 = [1, 0, 1, 1, 0]
    parent2 = [0, 1, 0, 0, 1]

    - 前 3 個基因: 來自parent1。
    - 後續基因: 來自parent2。
    child1 = [1, 0, 1] + [0, 1] = [1, 0, 1, 0, 1]
    child2 = [0, 1, 0] + [1, 0] = [0, 1, 0, 1, 0]
    '''
    point = np.random.randint(1, max(2, num_stocks - 1))  # 隨機選擇交配點，至少保留 1 個基因
    # 前部分基因來自父母 1，後部分基因來自父母 2
    child1 = np.concatenate((parent1[:point], parent2[point:]))
    # 前部分基因來自父母 2，後部分基因來自父母 1
    child2 = np.concatenate((parent2[:point], parent1[point:]))
    return child1, child2

# 突變
def mutate(chromosome):
    for i in range(num_stocks):
        if random.random() < mutation_rate:  # 按突變機率檢查每個基因
            chromosome[i] = 1 - chromosome[i]  # 基因翻轉（0->1 或 1->0）
    return chromosome

In [16]:
# GA 主程式
def genetic_algorithm():
    # 初始化群體集合
    population = initialize_population()

    # 迭代所有世代數
    for generation in range(num_generations):
        # 將群體集合中的群體計算適應性分數（夏普比率）
        fitness_scores = [fitness(chromosome) for chromosome in population]
        new_population = []
        for _ in range(population_size // 2):
            # 父母
            parent1 = select(population, fitness_scores)
            parent2 = select(population, fitness_scores)
            # 父母生下兩個小孩
            child1, child2 = crossover(parent1, parent2)
            # 小孩突變
            child1 = mutate(child1)
            child2 = mutate(child2)
            # 加入新群體集合
            new_population.append(child1)
            new_population.append(child2)
        # 新的群體
        population = new_population
        best_fitness = max(fitness_scores)
        print(f"Generation {generation + 1}: Best Fitness = {best_fitness}")
    best_index = np.argmax([fitness(chromosome) for chromosome in population])
    return population[best_index]

# 執行演算法
best_portfolio = genetic_algorithm()
print("\n最佳選股組合:", best_portfolio)
print("總報酬:", np.sum(best_portfolio * stock_returns))
print("總風險:", np.sqrt(np.sum((best_portfolio * stock_risks) ** 2)))

Generation 1: Best Fitness = 0.3741645805850462
Generation 2: Best Fitness = 0.3741645805850462
Generation 3: Best Fitness = 0.3741645805850462
Generation 4: Best Fitness = 0.3741645805850462
Generation 5: Best Fitness = 0.3741645805850462
Generation 6: Best Fitness = 0.3741645805850462
Generation 7: Best Fitness = 0.3741645805850462
Generation 8: Best Fitness = 0.3741645805850462
Generation 9: Best Fitness = 0.3741645805850462
Generation 10: Best Fitness = 0.3444273217934588
Generation 11: Best Fitness = 0.3444273217934588
Generation 12: Best Fitness = 0.3444273217934588
Generation 13: Best Fitness = 0.3444273217934588
Generation 14: Best Fitness = 0.3741645805850462
Generation 15: Best Fitness = 0.3741645805850462
Generation 16: Best Fitness = 0.3741645805850462
Generation 17: Best Fitness = 0.3741645805850462
Generation 18: Best Fitness = 0.3741645805850462
Generation 19: Best Fitness = 0.3741645805850462
Generation 20: Best Fitness = 0.3741645805850462
Generation 21: Best Fitness =

In [17]:
def summarize_results(best_portfolio):
    print("\n最佳選股組合:", best_portfolio)
    print("總報酬:", np.sum(best_portfolio * stock_returns))
    print("總風險:", np.sqrt(np.sum((best_portfolio * stock_risks) ** 2)))
    print("股票對應的代碼與選擇狀態:")
    for i, symbol in enumerate(stock_symbols):
        print(f"{symbol}: {'選擇' if best_portfolio[i] == 1 else '不選擇'}")

In [18]:
summarize_results(best_portfolio)


最佳選股組合: [1 1 0 1 1 1]
總報酬: 1.4712502068694233
總風險: 3.143467629065793
股票對應的代碼與選擇狀態:
2330: 選擇
1201: 選擇
2454: 不選擇
2357: 選擇
4426: 選擇
8271: 選擇
