In [2]:
from config import *

# 0. Data

In [3]:
df = pd.read_csv('data/italian_stock_data_2004_2024.csv', index_col=0, header=[0,1])
df = df.stack(level='Ticker').reset_index()
df.fillna(method='ffill', inplace=True) 
df.Date = pd.to_datetime(df.Date)
df.rename(columns={col:col.lower() for col in df.columns}, inplace=True)
df.rename(columns={'adj close':'adj_close'}, inplace=True)
df.ticker = df.ticker.str.replace('.MI','').str.replace('1','')

  df.ticker = df.ticker.str.replace('.MI','').str.replace('1','')


Feature engineering

In [4]:
# Basic Features
df['daily_return'] = df.groupby('ticker')['adj_close'].pct_change()
df['price_range'] = df['high'] - df['low']

# Moving Averages
df['sma_20'] = df.groupby('ticker')['adj_close'].rolling(window=20).mean().reset_index(0, drop=True)
df['sma_50'] = df.groupby('ticker')['adj_close'].rolling(window=50).mean().reset_index(0, drop=True)
df['ema_20'] = df.groupby('ticker', group_keys=False)['adj_close'].apply(lambda x: x.ewm(span=20, adjust=False).mean())

# Volatility Features
df['volatility_20'] = df.groupby('ticker')['adj_close'].rolling(window=20).std().reset_index(0, drop=True)
df['bollinger_upper'] = df['sma_20'] + 2 * df['volatility_20']
df['bollinger_lower'] = df['sma_20'] - 2 * df['volatility_20']

# Momentum Indicators
delta = df.groupby('ticker')['adj_close'].diff()
gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
df['rsi'] = 100 - (100 / (1 + gain / loss))
df['macd'] = df.groupby('ticker', group_keys=False)['adj_close'].apply(
    lambda x: x.ewm(span=12, adjust=False).mean() - x.ewm(span=26, adjust=False).mean()
).reset_index(level=0, drop=True)
df['signal'] = df['macd'].ewm(span=9, adjust=False).mean()

# Volume Features
df['volume_sma_20'] = df.groupby('ticker')['volume'].rolling(window=20).mean().reset_index(0, drop=True)
df['volume_spike'] = df['volume'] > 2 * df['volume_sma_20']

# Custom Features for GP
df['open_close_ratio'] = df['open'] / df['close']
df['high_low_ratio'] = df['high'] / df['low']
df['day_of_week'] = pd.to_datetime(df['date']).dt.dayofweek
df['cumulative_return'] = (1 + df['daily_return']).groupby(df['ticker']).cumprod()


In [5]:
df

Price,date,ticker,adj_close,close,high,low,open,volume,daily_return,price_range,...,bollinger_lower,rsi,macd,signal,volume_sma_20,volume_spike,open_close_ratio,high_low_ratio,day_of_week,cumulative_return
0,2004-01-01 00:00:00+00:00,A2A,0.565195,1.474000,1.474000,1.474000,1.474000,0.0,,0.000000,...,,,0.000000,0.000000,,False,1.000000,1.000000,3,
1,2004-01-01 00:00:00+00:00,CPR,0.752704,0.962500,0.962500,0.962500,0.962500,0.0,,0.000000,...,,,0.000000,0.000000,,False,1.000000,1.000000,3,
2,2004-01-01 00:00:00+00:00,ENEL,1.228324,4.640071,4.640071,4.640071,4.640071,0.0,,0.000000,...,,,0.000000,0.000000,,False,1.000000,1.000000,3,
3,2004-01-01 00:00:00+00:00,ENI,4.177127,14.960000,14.960000,14.960000,14.960000,0.0,,0.000000,...,,,0.000000,0.000000,,False,1.000000,1.000000,3,
4,2004-01-01 00:00:00+00:00,G,9.164813,21.000000,21.000000,21.000000,21.000000,0.0,,0.000000,...,,,0.000000,0.000000,,False,1.000000,1.000000,3,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76142,2023-12-29 00:00:00+00:00,SPM,1.470000,1.470000,1.489500,1.468000,1.478500,16596498.0,-0.008766,0.021500,...,1.362210,65.645576,0.006151,0.319741,3.536630e+07,False,1.005782,1.014646,4,0.088904
76143,2023-12-29 00:00:00+00:00,TEN,15.118384,15.745000,15.975000,15.700000,15.900000,1390360.0,-0.013471,0.275001,...,14.691860,47.679687,0.098455,0.275484,2.348357e+06,False,1.009844,1.017516,4,10.868004
76144,2023-12-29 00:00:00+00:00,TIT,0.294200,0.294200,0.300100,0.291400,0.299400,152745272.0,-0.017368,0.008700,...,0.241693,46.732260,0.009898,0.222366,1.859686e+08,False,1.017675,1.029856,4,0.218102
76145,2023-12-29 00:00:00+00:00,TRN,7.219790,7.554000,7.584000,7.532000,7.548000,1841328.0,0.000795,0.052000,...,7.085075,51.407960,0.047886,0.187470,3.652318e+06,False,0.999206,1.006904,4,11.905242


In [6]:
df_train = df[df.date<'2019-01-01']
df_test = df[df.date>='2019-01-01']

In [7]:
df.columns

Index(['date', 'ticker', 'adj_close', 'close', 'high', 'low', 'open', 'volume',
       'daily_return', 'price_range', 'sma_20', 'sma_50', 'ema_20',
       'volatility_20', 'bollinger_upper', 'bollinger_lower', 'rsi', 'macd',
       'signal', 'volume_sma_20', 'volume_spike', 'open_close_ratio',
       'high_low_ratio', 'day_of_week', 'cumulative_return'],
      dtype='object', name='Price')

In [None]:
features = ['adj_close', 'close', 'high', 'low', 'open', 'volume',
       'daily_return', 'price_range', 'sma_20', 'sma_50', 'ema_20',
       'volatility_20', 'bollinger_upper', 'bollinger_lower', 'rsi', 'macd',
       'signal', 'volume_sma_20', 'volume_spike', 'open_close_ratio',
       'high_low_ratio', 'day_of_week', 'cumulative_return']

These are the features we want - no more engineering features during the GP loop

# 1. Portfolio selection using GP

## Baseline: Markowitz

In [8]:
df_mark = df_train.pivot(index='date', columns='ticker', values='adj_close')
num_stocks = len(df_mark.columns)
avg_returns = np.array([np.mean(df_mark[col]) for col in df_mark.columns]).reshape(num_stocks,1)
cov_matrix = np.matrix(df_mark.cov())

In [9]:
optimal_weights_markowitz, optimal_fitness_markowitz = markowitz_solution(num_stocks,
                                                                          avg_returns,
                                                                          cov_matrix,
                                                                          risk_aversion=0.5,
                                                                          short_selling=True)

## Genetic programming

In [10]:
# Parameters for genetic programming
POPULATION_SIZE = 10
GENERATIONS = 20
TOURNAMENT_SIZE = 3
MUTATION_RATE = 0.2

In [11]:
def generate_random_strategy(prices):
    """Generate a random strategy as a simple decision rule."""
    return {
        "threshold": random.uniform(min(prices), max(prices)),
        "action": random.choice(["BUY", "SELL"]),
    }

In [12]:
def evaluate_strategy(strategy, prices):
    """Evaluate the fitness of a strategy using profit as the metric."""
    cash = 1000  # Initial cash
    stock = 0    # Initial stock holdings
    for price in prices:
        if strategy["action"] == "BUY" and price < strategy["threshold"]:
            stock += cash / price
            cash = 0
        elif strategy["action"] == "SELL" and price > strategy["threshold"]:
            cash += stock * price
            stock = 0
    return cash + stock * prices[-1]  # Final portfolio value

In [13]:
def mutate_strategy(strategy):
    """Randomly modify the strategy to introduce variation."""
    if random.random() < MUTATION_RATE:
        strategy["threshold"] += random.uniform(-5, 5)
    if random.random() < MUTATION_RATE:
        strategy["action"] = "BUY" if strategy["action"] == "SELL" else "SELL"
    return strategy

In [14]:
def crossover_strategy(parent1, parent2):
    """Combine two parent strategies to create an offspring."""
    return {
        "threshold": random.choice([parent1["threshold"], parent2["threshold"]]),
        "action": random.choice([parent1["action"], parent2["action"]]),
    }

In [15]:
def select_parent(population, fitnesses):
    """Select a parent using tournament selection."""
    tournament = random.sample(list(zip(population, fitnesses)), TOURNAMENT_SIZE)
    return max(tournament, key=lambda x: x[1])[0]

In [16]:
# Genetic Programming Loop
tickers = df['ticker'].unique()  # Get unique tickers

for ticker in tickers:
    # Extract historical prices for the current ticker
    historical_prices = df[df['ticker'] == ticker]['adj_close'].values

    # Initialize population for the current ticker
    population = [generate_random_strategy(historical_prices) for _ in range(POPULATION_SIZE)]

    print(f"Running genetic algorithm for {ticker}...")

    for generation in range(GENERATIONS):
        # Evaluate fitness
        fitnesses = [evaluate_strategy(strategy, historical_prices) for strategy in population]

        # Print the best strategy of this generation
        best_idx = np.argmax(fitnesses)
        print(f"  Generation {generation}: Best Fitness = {fitnesses[best_idx]:.2f}, Strategy = {population[best_idx]}")

        # Create new population
        new_population = []
        for _ in range(POPULATION_SIZE):
            # Select parents
            parent1 = select_parent(population, fitnesses)
            parent2 = select_parent(population, fitnesses) # I think we are selecting the same parent for all the algorithms

            # Create offspring
            offspring = crossover_strategy(parent1, parent2)
            offspring = mutate_strategy(offspring)

            new_population.append(offspring)

        population = new_population

Running genetic algorithm for A2A...
  Generation 0: Best Fitness = 4152.35, Strategy = {'threshold': 0.43883713403578745, 'action': 'BUY'}
  Generation 1: Best Fitness = 4152.35, Strategy = {'threshold': 0.43883713403578745, 'action': 'BUY'}
  Generation 2: Best Fitness = 4152.35, Strategy = {'threshold': 0.43883713403578745, 'action': 'BUY'}
  Generation 3: Best Fitness = 4152.35, Strategy = {'threshold': 0.43883713403578745, 'action': 'BUY'}
  Generation 4: Best Fitness = 4152.35, Strategy = {'threshold': 0.43883713403578745, 'action': 'BUY'}
  Generation 5: Best Fitness = 4152.35, Strategy = {'threshold': 0.43883713403578745, 'action': 'BUY'}
  Generation 6: Best Fitness = 4152.35, Strategy = {'threshold': 0.43883713403578745, 'action': 'BUY'}
  Generation 7: Best Fitness = 4152.35, Strategy = {'threshold': 0.43883713403578745, 'action': 'BUY'}
  Generation 8: Best Fitness = 4152.35, Strategy = {'threshold': 0.43883713403578745, 'action': 'BUY'}
  Generation 9: Best Fitness = 4152.

# 2. Plots

In [17]:
def compute_portfolio_value(weights):
    # given weights, uses daily adj_close prices to compute portfolio value
    pass

In [18]:
def plot_portfolios_values(portfolios):
    pass