In [64]:
import pandas as pd
import numpy as np
import random
from tqdm import tqdm
import logging
import os

In [None]:
class GeneticSupplyChainOptimizer:
    def __init__(self, demand_df, prices_df, max_load=8000, transport_cost=4500, population_size=30, generations=500, mutation_rate=0.1, horizon_days=6):
        self.original_demand_df = demand_df.copy()
        self.prices_df = prices_df.copy()
        self.max_load = max_load
        self.transport_cost = transport_cost
        self.population_size = population_size
        self.generations = generations
        self.mutation_rate = mutation_rate
        self.horizon_days = horizon_days

        self.demands = self.original_demand_df.groupby(['polygon', 'specie'])['demand'].sum().reset_index()
        self.species = self.demands['specie'].unique()
        self.suppliers = self.prices_df['supplier'].unique()

        # Setup logging
        self.log_file = 'GA_Supply_Chain.log'

        if os.path.exists(self.log_file):
            logging.shutdown()
            os.remove(self.log_file)

        logging.basicConfig(
            filename=self.log_file,
            filemode='a',
            format='%(asctime)s.%(msecs)01d %(name)s %(levelname)s %(message)s',
            datefmt='%Y-%m-%d %H:%M:%S',
            level=logging.INFO,
        )

    def generate_individual(self):
        remaining = self.demands.sample(frac=1)
        schedule = []
        day = 0

        while remaining['demand'].sum() > 0:
            day += 1
            remaining_capacity = self.max_load
            day_plan = []
            fulfilled_indices = []

            for idx, row in remaining[remaining['demand'] != 0].iterrows():
                polygon, specie, demand = row['polygon'], row['specie'], row['demand']
                if demand > remaining_capacity:
                    continue

                suppliers = self.prices_df[self.prices_df['specie'] == specie]['supplier'].tolist()
                supplier = random.choice(suppliers)

                day_plan.append((day, polygon, specie, supplier, demand))
                remaining_capacity -= demand
                fulfilled_indices.append(idx)

            schedule.extend(day_plan)
            remaining.loc[fulfilled_indices, 'demand'] = 0
            # logging.info(f'Missing Demand Units: {remaining.loc[:, 'demand'].sum()}')

        return schedule

    def fitness(self, schedule):
        remaining = self.demands.copy()
        cost = 0

        for day in range(1, self.horizon_days + 1):
            day_orders = [entry for entry in schedule if entry[0] == day]
            day_suppliers = set()
            day_load = 0

            for (_, polygon, specie, supplier, amount) in day_orders:
                row = self.prices_df[(self.prices_df['specie'] == specie) & (self.prices_df['supplier'] == supplier)]
                if row.empty:
                    return float('inf')

                unit_price = row.iloc[0]['price']
                cost += unit_price * amount
                day_load += amount
                day_suppliers.add(supplier)

                match = (remaining['polygon'] == polygon) & (remaining['specie'] == specie)
                if not any(match):
                    cost += 1e6
                else:
                    current_demand = remaining.loc[match, 'demand'].values[0]
                    if amount != current_demand:
                        cost += 1e6
                    else:
                        remaining.loc[match, 'demand'] = 0

            # if day_load > self.max_load:
            #     cost += 1e6

            cost += self.transport_cost * len(day_suppliers)

        # if remaining['demand'].sum() > 0:
        #     cost += 1e6

        return cost

    def mutate(self, individual):
        new_schedule = []
        for (day, polygon, specie, supplier, amount) in individual:
            if random.random() < self.mutation_rate:
                possible_suppliers = self.prices_df[self.prices_df['specie'] == specie]['supplier'].tolist()
                supplier = random.choice(possible_suppliers)
            new_schedule.append((day, polygon, specie, supplier, amount))
        return new_schedule

    def crossover(self, p1, p2):
        # if len(p1) == 0:
        #     return []
        # split = random.randint(1, len(p1) - 1) if len(p1) > 1 else 1
        split = len(p1) // 2
        return p1[:split] + p2[split:]

    def evolve(self, patience=50):
        logging.info("Starting evolution process")
        population = [self.generate_individual() for _ in range(self.population_size)]

        best_cost = None
        best_solution = None
        best_generation = 0
        no_improve_count = 0

        for generation in tqdm(list(range(self.generations))):
            scored = [(self.fitness(ind), ind) for ind in population]
            scored.sort(key=lambda x: x[0])

            current_best_cost = scored[0][0]
            current_worst_cost = scored[-1][0]
            current_best_solution = scored[0][1]

            if best_cost is None or current_best_cost < best_cost:
                best_cost = current_best_cost
                best_solution = current_best_solution
                best_generation = generation
                no_improve_count = 0
            else:
                no_improve_count += 1

            if no_improve_count >= patience:
                logging.info(f"Stopping early at generation {generation+1} due to no improvement in {patience} generations.")
                break

            survivors = [ind for (_, ind) in scored[:self.population_size // 2]]
            new_population = survivors.copy()

            while len(new_population) < self.population_size:
                p1, p2 = random.sample(survivors, 2)
                child = self.crossover(p1, p2)
                child = self.mutate(child)
                new_population.append(child)

            population = new_population
            logging.info(f"Generation {generation+1}: Best cost = {current_best_cost:.2f}. Worst cost = {current_worst_cost:.2f}")

        logging.info(f"Evolution completed. Best cost: {best_cost:.2f} at generation {best_generation+1}")
        return best_solution, best_cost


In [66]:
demand_df = pd.read_csv('../setup/demand.csv')
demand_df = demand_df.loc[
    demand_df['polygon'].isin([1,18,26]) & demand_df['specie'].isin(['Agave lechuguilla', 'Opuntia cantabrigiensis', 'Yucca filifera'])
]

prices_df = pd.read_csv('../setup/supplier_prices.csv')

In [67]:
demand_df

Unnamed: 0,specie,polygon,demand
0,Agave lechuguilla,1,178
4,Opuntia cantabrigiensis,1,210
9,Yucca filifera,1,113
60,Agave lechuguilla,18,234
64,Opuntia cantabrigiensis,18,277
69,Yucca filifera,18,149
120,Agave lechuguilla,26,157
124,Opuntia cantabrigiensis,26,185
129,Yucca filifera,26,100


In [68]:
optimizer = GeneticSupplyChainOptimizer(demand_df, prices_df, max_load=500)
all_orders, total_cost = optimizer.evolve()

orders = pd.DataFrame(all_orders, columns = ['Day', 'Polygon', 'Specie', 'Supplier', 'Amount'])
orders = orders.groupby(['Day', 'Supplier', 'Specie']).agg({'Amount' : 'sum'}).reset_index()

 10%|█         | 50/500 [00:27<04:06,  1.83it/s]


In [69]:
all_orders

[(1, 1, 'Yucca filifera', 'Vivero', 113),
 (1, 26, 'Agave lechuguilla', 'Laguna seca', 157),
 (1, 1, 'Agave lechuguilla', 'Laguna seca', 178),
 (2, 18, 'Opuntia cantabrigiensis', 'Moctezuma', 277),
 (2, 26, 'Opuntia cantabrigiensis', 'Moctezuma', 185),
 (3, 18, 'Agave lechuguilla', 'Laguna seca', 234),
 (3, 18, 'Yucca filifera', 'Vivero', 149),
 (3, 26, 'Yucca filifera', 'Vivero', 100),
 (4, 1, 'Opuntia cantabrigiensis', 'Moctezuma', 210)]

In [70]:
orders

Unnamed: 0,Day,Supplier,Specie,Amount
0,1,Laguna seca,Agave lechuguilla,335
1,1,Vivero,Yucca filifera,113
2,2,Moctezuma,Opuntia cantabrigiensis,462
3,3,Laguna seca,Agave lechuguilla,234
4,3,Vivero,Yucca filifera,249
5,4,Moctezuma,Opuntia cantabrigiensis,210


In [71]:
pd.DataFrame(all_orders, columns = ['Day', 'Polygon', 'Specie', 'Supplier', 'Amount']).groupby('Day')['Polygon'].unique()

Day
1     [1, 26]
2    [18, 26]
3    [18, 26]
4         [1]
Name: Polygon, dtype: object