In [54]:
# Install & Imports
import numpy as np
import pandas as pd
import random

# Load dataset
df = pd.read_csv("Dummy_Lawyers.csv")

# Prepare objectives: [Price, 10 - Satisfaction]  (minimize both)
objectives = np.column_stack((
    df["Price"].values,
    10 - df["Client satisfaction (out of 10)"].values
))

In [55]:
# NSABC Class
class NSABC:
    def __init__(self, obj_vals, swarm_size=50, archive_size=50, max_iter=200, limit=20):
        self.n_solutions = obj_vals.shape[0]
        self.obj = obj_vals
        self.NP = swarm_size
        self.archive_size = archive_size
        self.MIC = max_iter
        self.limit = limit  # scout limit
        self.X = random.sample(range(self.n_solutions), self.NP)
        self.trial = {idx: 0 for idx in self.X}
        self.archive = []

    def _non_dominated_sort(self, idxs):
        objs = self.obj[idxs]
        n = len(idxs)
        S = [[] for _ in range(n)]
        front = [[]]
        domination_count = [0]*n

        for p in range(n):
            for q in range(n):
                if all(objs[p] <= objs[q]) and any(objs[p] < objs[q]):
                    S[p].append(q)
                elif all(objs[q] <= objs[p]) and any(objs[q] < objs[p]):
                    domination_count[p] += 1
            if domination_count[p] == 0:
                front[0].append(p)

        i = 0
        while front[i]:
            next_front = []
            for p in front[i]:
                for q in S[p]:
                    domination_count[q] -= 1
                    if domination_count[q] == 0:
                        next_front.append(q)
            i += 1
            front.append(next_front)

        return [[idxs[i] for i in fr] for fr in front[:-1]]

    def _crowding_distance(self, front):
        l = len(front)
        if l == 0:
            return []
        dist = {idx: 0.0 for idx in front}
        for m in range(self.obj.shape[1]):
            sorted_list = sorted(front, key=lambda idx: self.obj[idx, m])
            dist[sorted_list[0]] = np.inf
            dist[sorted_list[-1]] = np.inf
            min_val = self.obj[sorted_list[0], m]
            max_val = self.obj[sorted_list[-1], m]
            if max_val == min_val:
                continue
            for i in range(1, l-1):
                prev_val = self.obj[sorted_list[i-1], m]
                next_val = self.obj[sorted_list[i+1], m]
                dist[sorted_list[i]] += (next_val - prev_val) / (max_val - min_val)
        return [dist[idx] for idx in front]

    def _update_archive(self, swarm_idxs):
        merged = list(set(self.archive + swarm_idxs))
        fronts = self._non_dominated_sort(merged)
        new_archive = []
        for front in fronts:
            if len(new_archive) + len(front) <= self.archive_size:
                new_archive.extend(front)
            else:
                cds = self._crowding_distance(front)
                ranked = sorted(zip(front, cds), key=lambda x: -x[1])
                need = self.archive_size - len(new_archive)
                new_archive.extend([idx for idx, _ in ranked[:need]])
                break
        self.archive = new_archive

    def _mutate_solution(self, idx):
        return random.randrange(self.n_solutions)

    def _employee_phase(self):
        new_X = []
        for idx in self.X:
            v = self._mutate_solution(idx)
            u = self._mutate_solution(idx)
            cands = [idx, v, u]
            fronts = self._non_dominated_sort(cands)
            chosen = random.choice(fronts[0])
            new_X.append(chosen)
            self.trial[idx] = 0 if chosen != idx else self.trial.get(idx,0) + 1
        self.X = new_X

    def _onlooker_phase(self):
        fronts = self._non_dominated_sort(self.X)
        distances = {}
        for front in fronts:
            cds = self._crowding_distance(front)
            for idx, cd in zip(front, cds):
                distances[idx] = cd
        inf_idxs = [idx for idx, d in distances.items() if np.isinf(d)]
        idx_list = list(self.X)
        new_X = []
        for _ in range(self.NP):
            if inf_idxs:
                sel = random.choice(inf_idxs)
            else:
                dist_list = [distances.get(idx, 0.0) for idx in idx_list]
                total = sum(dist_list)
                probs = [d/total for d in dist_list] if total > 0 else [1/len(idx_list)]*len(idx_list)
                sel = random.choices(idx_list, weights=probs, k=1)[0]
            m = self._mutate_solution(sel)
            if all(self.obj[m] <= self.obj[sel]) and any(self.obj[m] < self.obj[sel]):
                new_X.append(m)
                self.trial[sel] = 0
            else:
                new_X.append(sel)
                self.trial[sel] = self.trial.get(sel,0) + 1
        self.X = new_X

    def _scout_phase(self):
        for i, idx in enumerate(self.X):
            if self.trial.get(idx, 0) > self.limit:
                new_idx = random.randrange(self.n_solutions)
                self.X[i] = new_idx
                self.trial[new_idx] = 0
                self.trial[idx] = 0

    def run(self):
        self._update_archive(self.X)
        for _ in range(self.MIC):
            self._employee_phase()
            self._onlooker_phase()
            self._scout_phase()
            self._update_archive(self.X)
        return self.archive

In [56]:
# Run NSABC
nsabc = NSABC(objectives, swarm_size=100, archive_size=50, max_iter=200)
pareto_idxs = nsabc.run()

# Extract recommended Pareto-optimal lawyers
recommended = df.iloc[pareto_idxs].copy()

# Define a trade-off score (lower is better: Price × (10 - Satisfaction))
recommended["Tradeoff score"] = recommended["Price"] * (10 - recommended["Client satisfaction (out of 10)"])

# Sort by trade-off
recommended = recommended.sort_values("Tradeoff score").reset_index(drop=True)

print("✅ Top Pareto-optimal lawyers (Price vs Satisfaction):")
recommended[["Name", "Domain", "Price", "Client satisfaction (out of 10)", "Tradeoff score"]].head(10)

✅ Top Pareto-optimal lawyers (Price vs Satisfaction):


Unnamed: 0,Name,Domain,Price,Client satisfaction (out of 10),Tradeoff score
0,Adrian Rodriguez,Labor Law,21678.76,10.0,0.0
1,Holly Webb,Family Law,22244.03,10.0,0.0
2,Mrs. Melissa Miller MD,Family Law,22872.58,10.0,0.0
3,Jorge Hudson,Criminal Law,19180.58,9.9,1918.058
4,Trevor Peck,Labor Law,21162.47,9.9,2116.247
5,Anna Turner,Family Law,17012.09,9.8,3402.418
6,Lisa Gonzalez,IP Law,18141.41,9.8,3628.282
7,Megan Johnston,IP Law,16762.1,9.7,5028.63
8,Olivia Smith,Family Law,17000.98,9.7,5100.294
9,Lauren Ryan,Family Law,14535.34,9.6,5814.136
