In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

# Step 1: Data Loading
# Load Biomass_History.csv
biomass_history = pd.read_csv("../data/Biomass_History.csv")

# Load Distance_Matrix.csv
distance_matrix = pd.read_csv("../data/Distance_Matrix.csv")

In [2]:
# Step 2: Data Preprocessing
# List of columns to be combined (2010 to 2017)
columns_to_combine = ['2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017']

# Melt the DataFrame to combine the columns into a single column while retaining the year information
biomass_data = pd.melt(biomass_history, id_vars=['Index', 'Longitude', 'Latitude'], value_vars=columns_to_combine, var_name='Year', value_name='Value')

# Convert the 'Year' column to numeric
biomass_data['Year'] = pd.to_numeric(biomass_data['Year'])

Step 3: Biomass Forecasting using ARIMA
ARIMA is a popular time-series forecasting method that combines autoregression, differencing, and moving average components. We'll use the statsmodels library to implement ARIMA.

In [3]:
# Step 3: Biomass Forecasting
# This step requires implementing time-series forecasting methods like ARIMA or Prophet to predict biomass availability for the years 2018 and 2019. You need to provide code for this step based on your preferred forecasting method.
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from statsmodels.tsa.arima.model import ARIMA

# Step 1: Data Loading
# Load Biomass_History.csv
biomass_history = pd.read_csv("../data/Biomass_History.csv")

# Load Distance_Matrix.csv
distance_matrix = pd.read_csv("../data/Distance_Matrix.csv")

# Step 2: Data Preprocessing
# ... (Same as before)

# Step 3: Biomass Forecasting using ARIMA
# Assuming 'biomass_data' contains columns: 'Year', 'Longitude', 'Latitude', and 'Value'
# Convert the 'Year' column to numeric
biomass_data['Year'] = pd.to_numeric(biomass_data['Year'])

# Prepare the output DataFrame and save to prediction.csv
header = ["year", "data_type", "source_index", "destination_index", "value"]
output_data = pd.DataFrame(columns=header)

# Iterate through each group, train the ARIMA model, and make predictions
for idx, (group_name, group_data) in enumerate(biomass_data.groupby(['Longitude', 'Latitude'])):
    longitude, latitude = group_name
    train_data = group_data[group_data['Year'] <= 2017]
    test_data = group_data[group_data['Year'] >= 2018]

    # Fit ARIMA model to the training data
    model = ARIMA(train_data['Value'], order=(2, 1, 0))  # Use appropriate order (p, d, q)
    model_fit = model.fit()

    # Predict biomass availability for the years 2018 and 2019
    years_to_predict = [2018, 2019]
    predictions = model_fit.forecast(steps=len(years_to_predict))

    # Save the predictions for biomass_forecast
    for year, prediction in zip(years_to_predict, predictions):
        output_data = output_data.append({"year": year, "data_type": "biomass_forecast", "source_index": idx, "destination_index": '', "value": prediction}, ignore_index=True)

# Save the output to prediction.csv with header
output_data.to_csv('../data/prediction1.csv', index=False)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  output_data = output_data.append({"year": year, "data_type": "biomass_forecast", "source_index": idx, "destination_index": '', "value": prediction}, ignore_index=True)
  output_data = output_data.append({"year": year, "data_type": "biomass_forecast", "source_index": idx, "destination_index": '', "value": prediction}, ignore_index=True)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  output_data = output_data.append({"year": year, "data_type": "biomass_forecast", "source_index": idx, "destination_index": '', "value": prediction}, ignore_index=True)
  output_data = output_data.append({"year": year, "data_type": "biomass_forecast", "source_index": idx, "destination_index": '', "value": prediction}, ignore_index=True)
  se

Step 4: Optimal Asset Locations using Genetic Algorithm
For the optimization of asset locations, we'll use the DEAP (Distributed Evolutionary Algorithms in Python) library, which includes implementations of Genetic Algorithm and other evolutionary optimization algorithms.

In [4]:
# Step 4: Optimal Asset Locations
# Implement an optimization algorithm to find the optimal locations for preprocessing depots and biorefineries based on the forecasted biomass data and distance matrix. You can use libraries like DEAP or Optuna to implement the optimization algorithm. This step involves defining the optimization problem, constraints, and the cost function based on the biomass forecast and distance data.
from deap import algorithms, base, creator, tools

# Step 4: Optimal Asset Locations using Genetic Algorithm
# ... (Continue from Step 3)
from deap import algorithms, base, creator, tools
import pandas as pd
import numpy as np

biomass_forecast_data = pd.read_csv("../data/prediction1.csv")
# Define the optimization problem
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", list, fitness=creator.FitnessMin)

# Define the number of depots and refineries
num_depots = 25
num_refineries = 5

# Define the number of generations and population size for the Genetic Algorithm
num_generations = 50
population_size = 100


# Define the optimization function (cost function) based on the forecasted biomass data and distance matrix
def optimization_function(individual):
    # individual contains the indices of selected depots and refineries
    # Calculate the overall cost using the indices and their respective forecasted biomass data and distance matrix

    # Constants for the cost function
    a = 0.001
    b = 1
    c = 1

    # Get the indices of depots and refineries from the individual
    depot_indices = individual[:num_depots]
    refinery_indices = individual[num_depots:]

    # Check if there are any duplicate depot indices for each location
    unique_depot_indices = set(depot_indices)
    if len(unique_depot_indices) != num_depots:
        # Penalize the cost if there are duplicates
        return 100000,  # Adjust the penalty value as needed

    # Cost of transportation (Ct)
    ct_cost = 0.0
    for i in range(len(depot_indices)):
        for j in range(len(refinery_indices)):
            ct_cost += distance_matrix[depot_indices[i]][refinery_indices[j]] * biomass_forecast_data.iloc[depot_indices[i]]['Value']
    
    # Cost of biomass forecast mismatch (Cf)
    cf_cost = 0.0
    for i in range(len(depot_indices)):
        cf_cost += abs(biomass_forecast_data.iloc[depot_indices[i]]['Value'] - biomass_forecast_data.iloc[depot_indices[i]]['True Value'])

    # Cost of underutilization (Cu)
    cu_cost = 0.0
    for j in range(len(refinery_indices)):
        total_pellet = 0.0
        for i in range(len(depot_indices)):
            total_pellet += biomass_forecast_data.iloc[depot_indices[i]]['Value']
        cu_cost += max(0, refinery_capacity - total_pellet)

    # Overall cost
    cost = a * ct_cost + b * cf_cost + c * cu_cost

    return cost,

# Create the toolbox for the Genetic Algorithm
toolbox = base.Toolbox()
toolbox.register("indices", np.random.randint, 0, 2417, num_depots + num_refineries)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.indices)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutUniformInt, low=0, up=2417, indpb=0.1)
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("evaluate", optimization_function)

# Initialize the population for the Genetic Algorithm
population = toolbox.population(n=population_size)

# Run the Genetic Algorithm
for gen in range(num_generations):
    # Evaluate the fitness for each individual in the population
    fitnesses = [toolbox.evaluate(ind) for ind in population]
    for ind, fit in zip(population, fitnesses):
        ind.fitness.values = fit

    # Select the next generation individuals
    offspring = toolbox.select(population, len(population))

    # Clone the selected individuals
    offspring = list(map(toolbox.clone, offspring))

    # Apply crossover and mutation on the offspring
    for child1, child2 in zip(offspring[::2], offspring[1::2]):
        if np.random.rand() < 0.5:
            toolbox.mate(child1, child2)
            del child1.fitness.values
            del child2.fitness.values

    for mutant in offspring:
        if np.random.rand() < 0.2:
            toolbox.mutate(mutant)
            del mutant.fitness.values

    # Replace the old population with the offspring
    population[:] = offspring

# Get the best individual from the final population
best_individual = tools.selBest(population, k=1)[0]

# Save the selected depots and refineries to the output DataFrame
output_data = pd.DataFrame(columns=header)
for idx, val in enumerate(best_individual):
    data_type = 'depot_location' if idx < num_depots else 'refinery_location'
    output_data = output_data.append({"year": "20182019", "data_type": data_type, "source_index": val, "destination_index": '', "value": None}, ignore_index=True)

# Save the output to prediction.csv with header
output_data.to_csv('../data/optimization_results.csv', index=False)

KeyError: 2126

In [None]:
import pandas as pd

# Read the first CSV file
df1 = pd.read_csv('../data/prediction1.csv')

# Read the second CSV file
df2 = pd.read_csv('../data/optimization_results.csv')

# Merge the two dataframes
merged_df = pd.concat([df1, df2], ignore_index=True)

# Save the merged dataframe to a new CSV file
merged_df.to_csv('merged_file1.csv', index=False)
