optimized the trading strategy based on sentiment analysis

Loading the Dataset

In [3]:
import pandas as pd
import numpy as np

# Load data
data_sentiment = pd.read_csv('sentiment_trading_data.csv')
print(data_sentiment.head())

# Convert to NumPy array (excluding the 'Date' column)
data_sentiment = data_sentiment.drop(columns=['Date']).to_numpy()


         Date  Stock_Price  Sentiment
0  2020-01-01    87.454012   0.302466
1  2020-01-02   145.071431  -0.786814
2  2020-01-03   123.199394   0.315691
3  2020-01-04   109.865848   0.998827
4  2020-01-05    65.601864  -0.903576


Intialize the fitness function

In [4]:
def fitness_function(individual, data):
    stock_prices = data[:, 0]
    sentiments = data[:, 1]

    capital = 100000  # Starting capital
    position = 0      # Initial position (0 means no stock held)

    for i in range(len(data)):
        if sentiments[i] > individual[0]:  # Buy signal based on sentiment threshold
            position += capital // stock_prices[i]  # Buy as many stocks as possible
            capital -= position * stock_prices[i]   # Deduct spent capital
        elif sentiments[i] < individual[1]:  # Sell signal based on sentiment threshold
            capital += position * stock_prices[i]   # Sell all stocks
            position = 0                            # Reset position

    return capital



Intialize Population

In [5]:
def initialize_population(pop_size):
    population = []
    for _ in range(pop_size):
        buy_threshold = np.random.uniform(-1, 1)  # Random buy threshold
        sell_threshold = np.random.uniform(-1, 1) # Random sell threshold
        individual = [buy_threshold, sell_threshold]
        population.append(individual)
    return population


Perform Selection

In [6]:
def selection(population, fitness_scores, num_parents):
    parents = [population[idx] for idx in np.argsort(fitness_scores)[-num_parents:]]
    return parents


Perform crossover

In [7]:
def crossover(parents, offspring_size):
    offspring = []
    for _ in range(offspring_size):
        parent1 = parents[np.random.randint(len(parents))]
        parent2 = parents[np.random.randint(len(parents))]
        crossover_point = np.random.randint(1, len(parent1))
        child = parent1[:crossover_point] + parent2[crossover_point:]
        offspring.append(child)
    return offspring


Perform Mutation

In [8]:
def mutation(offspring, mutation_rate):
    for individual in offspring:
        if np.random.rand() < mutation_rate:
            mutation_point = np.random.randint(len(individual))
            individual[mutation_point] = np.random.uniform(-1, 1)  # Mutate with new random threshold
    return offspring


Perform Genetic algorithm

In [9]:
def genetic_algorithm(data, num_generations, pop_size, num_parents, mutation_rate):
    population = initialize_population(pop_size)

    for generation in range(num_generations):
        fitness_scores = [fitness_function(individual, data) for individual in population]
        parents = selection(population, fitness_scores, num_parents)
        offspring_size = pop_size - len(parents)
        offspring = crossover(parents, offspring_size)
        offspring = mutation(offspring, mutation_rate)
        population = parents + offspring

        best_fitness = np.max(fitness_scores)
        print(f"Generation {generation}: Best Fitness = {best_fitness}")

    best_individual = population[np.argmax(fitness_scores)]
    return best_individual

# Run the genetic algorithm
num_generations = 50
pop_size = 100
num_parents = 20
mutation_rate = 0.01

best_params = genetic_algorithm(data_sentiment, num_generations, pop_size, num_parents, mutation_rate)
print(f"Best Trading Strategy: Buy Threshold = {best_params[0]}, Sell Threshold = {best_params[1]}")


Generation 0: Best Fitness = 129785.02657707607
Generation 1: Best Fitness = 176212273.2138567
Generation 2: Best Fitness = 3.900617142553841e+32
Generation 3: Best Fitness = 3.900617142553841e+32
Generation 4: Best Fitness = 3.900617142553841e+32
Generation 5: Best Fitness = 3.900617142553841e+32
Generation 6: Best Fitness = 3.900617142553841e+32
Generation 7: Best Fitness = 3.900617142553841e+32
Generation 8: Best Fitness = 3.900617142553841e+32
Generation 9: Best Fitness = 3.900617142553841e+32
Generation 10: Best Fitness = 3.900617142553841e+32
Generation 11: Best Fitness = 3.900617142553841e+32
Generation 12: Best Fitness = 3.900617142553841e+32
Generation 13: Best Fitness = 3.900617142553841e+32
Generation 14: Best Fitness = 3.900617142553841e+32
Generation 15: Best Fitness = 3.900617142553841e+32
Generation 16: Best Fitness = 3.900617142553841e+32
Generation 17: Best Fitness = 3.900617142553841e+32
Generation 18: Best Fitness = 3.900617142553841e+32
Generation 19: Best Fitness =