In [11]:
import pandas as pd

# Load the dataset
data = pd.read_csv('crypto_investment_data.csv')
data['Date'] = pd.to_datetime(data['Date'])

# Display the first few rows of the dataset
data.head()


Unnamed: 0,Date,Open,High,Low,Close,Volume,Market_Cap,Sentiment_Score
0,2021-01-01,33745.401188,34671.065833,32436.87277,34436.213165,6147.962905,211712600.0,-0.212729
1,2021-01-02,39507.143064,42216.647801,38272.249069,40693.868653,8248.890964,335679300.0,-0.053129
2,2021-01-03,37319.939418,41684.668597,32788.666516,36321.811013,7841.448368,284815600.0,0.709095
3,2021-01-04,35986.584842,39647.709274,34738.853843,36486.08124,2385.099142,87022920.0,-0.319991
4,2021-01-05,31560.186404,35592.992144,30200.437774,31847.170337,2343.245228,74625730.0,0.739299


In [12]:
# Create features and labels
data['Future_Close'] = data['Close'].shift(-1)  # Predicting the next day's closing price
data = data.dropna()  # Drop the last row with NaN value

features = data[['Open', 'High', 'Low', 'Close', 'Volume', 'Market_Cap', 'Sentiment_Score']]
labels = data['Future_Close']

# Split the data into training and testing sets
train_size = int(0.8 * len(data))
X_train, X_test = features[:train_size], features[train_size:]
y_train, y_test = labels[:train_size], labels[train_size:]

print(f'Training samples: {len(X_train)}, Testing samples: {len(X_test)}')


Training samples: 799, Testing samples: 200


Define Genetic Algorithm


In [13]:
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression

def evaluate_model(params, X_train, y_train, X_test, y_test):
    # Create and train the model
    model = LinearRegression()
    model.fit(X_train, y_train)

    # Make predictions
    predictions = model.predict(X_test)

    # Calculate mean squared error
    mse = mean_squared_error(y_test, predictions)

    return -mse  # We want to minimize MSE, so we return its negative value


Population initialization

In [14]:
import numpy as np

def initialize_population(pop_size, param_bounds):
    return [np.random.uniform(low, high, size=(len(param_bounds),)) for low, high in param_bounds]


Selection function

In [15]:
def selection(population, fitness_scores, num_parents):
    parents = [population[idx] for idx in np.argsort(fitness_scores)[-num_parents:]]
    return parents


Crossover

In [16]:
def crossover(parents, offspring_size):
    offspring = []
    for _ in range(offspring_size):
        parent1, parent2 = np.random.choice(len(parents), size=2, replace=False)
        crossover_point = np.random.randint(1, len(parents[0]) - 1)
        child = np.concatenate((parents[parent1][:crossover_point], parents[parent2][crossover_point:]))
        offspring.append(child)
    return offspring


Mutation

In [17]:
def mutation(offspring, mutation_rate=0.01):
    for individual in offspring:
        if np.random.rand() < mutation_rate:
            mutation_idx = np.random.randint(len(individual))
            individual[mutation_idx] += np.random.uniform(-0.1, 0.1)
    return offspring


Imeplement Genetics ALgorithm

In [18]:
def genetic_algorithm(X_train, y_train, X_test, y_test, num_generations, pop_size, num_parents, mutation_rate):
    param_bounds = [(0, 1) for _ in range(X_train.shape[1])]  # Example bounds for feature weights
    population = initialize_population(pop_size, param_bounds)

    for generation in range(num_generations):
        fitness_scores = [evaluate_model(individual, X_train, y_train, X_test, y_test) for individual in population]
        parents = selection(population, fitness_scores, num_parents)
        offspring_size = pop_size - len(parents)
        offspring = crossover(parents, offspring_size)
        offspring = mutation(offspring, mutation_rate)
        population = parents + offspring

        best_fitness = np.max(fitness_scores)
        print(f"Generation {generation}: Best Fitness = {best_fitness}")

    best_params = population[np.argmax(fitness_scores)]
    return best_params

# Run the genetic algorithm
num_generations = 50
pop_size = 100
num_parents = 20
mutation_rate = 0.01

best_params = genetic_algorithm(X_train, y_train, X_test, y_test, num_generations, pop_size, num_parents, mutation_rate)
print(f"Best Parameters: {best_params}")


Generation 0: Best Fitness = -9990491.192329021
Generation 1: Best Fitness = -9990491.192329021
Generation 2: Best Fitness = -9990491.192329021
Generation 3: Best Fitness = -9990491.192329021
Generation 4: Best Fitness = -9990491.192329021
Generation 5: Best Fitness = -9990491.192329021
Generation 6: Best Fitness = -9990491.192329021
Generation 7: Best Fitness = -9990491.192329021
Generation 8: Best Fitness = -9990491.192329021
Generation 9: Best Fitness = -9990491.192329021
Generation 10: Best Fitness = -9990491.192329021
Generation 11: Best Fitness = -9990491.192329021
Generation 12: Best Fitness = -9990491.192329021
Generation 13: Best Fitness = -9990491.192329021
Generation 14: Best Fitness = -9990491.192329021
Generation 15: Best Fitness = -9990491.192329021
Generation 16: Best Fitness = -9990491.192329021
Generation 17: Best Fitness = -9990491.192329021
Generation 18: Best Fitness = -9990491.192329021
Generation 19: Best Fitness = -9990491.192329021
Generation 20: Best Fitness = 