In [None]:
# Step 2: Data Preprocessing
# List of columns to be combined (2010 to 2017)
import pandas as pd
biomass_history = pd.read_csv("../data/Biomass_History.csv")
columns_to_combine = ['2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017']

# Melt the DataFrame to combine the columns into a single column while retaining the year information
biomass_data = pd.melt(biomass_history, id_vars=['Index', 'Longitude', 'Latitude'], value_vars=columns_to_combine, var_name='Year', value_name='Value')

# Convert the 'Year' column to numeric
#biomass_data['Year'] = pd.to_numeric(biomass_data['Year'])

In [10]:
# Step 4: Optimal Asset Locations
# Implement an optimization algorithm to find the optimal locations for preprocessing depots and biorefineries based on the forecasted biomass data and distance matrix. You can use libraries like DEAP or Optuna to implement the optimization algorithm. This step involves defining the optimization problem, constraints, and the cost function based on the biomass forecast and distance data.
from deap import algorithms, base, creator, tools

# Step 4: Optimal Asset Locations using Genetic Algorithm
# ... (Continue from Step 3)
from deap import algorithms, base, creator, tools
import pandas as pd
import numpy as np

# Define the optimization problem
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", list, fitness=creator.FitnessMin)

# Define the number of depots and refineries
num_depots = 25
num_refineries = 5

# Define the number of generations and population size for the Genetic Algorithm
num_generations = 50
population_size = 100


# Define the optimization function (cost function) based on the forecasted biomass data and distance matrix
def optimization_function(individual):
    # individual contains the indices of selected depots and refineries
    # Calculate the overall cost using the indices and their respective forecasted biomass data and distance matrix
    cost = 0.0

    # Get the indices of depots and refineries from the individual
    depot_indices = individual[:num_depots]
    refinery_indices = individual[num_depots:]

    # Check if there are any duplicate depot indices for each location
    unique_depot_indices = set(depot_indices)
    if len(unique_depot_indices) != num_depots:
        # Penalize the cost if there are duplicates
        cost += 1000.0  # Adjust the penalty value as needed
    
    # ... (implement the cost calculation based on the description)

    return cost,

# Create the toolbox for the Genetic Algorithm
toolbox = base.Toolbox()
toolbox.register("indices", np.random.randint, 0, 2417, num_depots + num_refineries)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.indices)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutUniformInt, low=0, up=2417, indpb=0.1)
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("evaluate", optimization_function)

# Initialize the population for the Genetic Algorithm
population = toolbox.population(n=population_size)

# Run the Genetic Algorithm
for gen in range(num_generations):
    # Evaluate the fitness for each individual in the population
    fitnesses = [toolbox.evaluate(ind) for ind in population]
    for ind, fit in zip(population, fitnesses):
        ind.fitness.values = fit

    # Select the next generation individuals
    offspring = toolbox.select(population, len(population))

    # Clone the selected individuals
    offspring = list(map(toolbox.clone, offspring))

    # Apply crossover and mutation on the offspring
    for child1, child2 in zip(offspring[::2], offspring[1::2]):
        if np.random.rand() < 0.5:
            toolbox.mate(child1, child2)
            del child1.fitness.values
            del child2.fitness.values

    for mutant in offspring:
        if np.random.rand() < 0.2:
            toolbox.mutate(mutant)
            del mutant.fitness.values

    # Replace the old population with the offspring
    population[:] = offspring

# Get the best individual from the final population
best_individual = tools.selBest(population, k=1)[0]

# Save the selected depots and refineries to the output DataFrame
output_data = pd.DataFrame(columns=header)
for idx, val in enumerate(best_individual):
    data_type = 'depot_location' if idx < num_depots else 'refinery_location'
    output_data = output_data.append({"year": "20182019", "data_type": data_type, "source_index": val, "destination_index": '', "value": None}, ignore_index=True)

# Save the output to prediction.csv with header
output_data.to_csv('../data/optimization_results.csv', index=False)



NameError: name 'header' is not defined

In [15]:
import pandas as pd

# Read the CSV file
df = pd.read_csv('merged_file1.csv')

# Define the custom sorting order
sorting_order = ['20182019,depot_location','20182019,refinery_location', '2018,biomass_forecast', '2019,biomass_forecast']

# Map the data_type to an integer based on the custom sorting order
df['data_type_order'] = df['year'].astype(str) + ',' + df['data_type']
df['data_type_order'] = df['data_type_order'].map({x: i for i, x in enumerate(sorting_order)})

# Sort the DataFrame based on the custom sorting order
df_sorted = df.sort_values(by=['data_type_order', 'year'], ascending=[True, True])

# Drop the temporary column used for sorting
df_sorted = df_sorted.drop(columns=['data_type_order'])

# Save the sorted DataFrame to a new CSV file
df_sorted.to_csv('../data/final2.csv', index=False)
