In [5]:
import numpy as np
import pandas as pd
import random

In [50]:
X_df = pd.DataFrame({"age": [random.randrange(30, 60) for _ in range(100)]})
X_df

Unnamed: 0,age
0,32
1,48
2,39
3,51
4,54
...,...
95,54
96,42
97,58
98,33


In [51]:
Y_df = pd.DataFrame({"age": [random.randrange(40, 70) for _ in range(120)]})
Y_df

Unnamed: 0,age
0,41
1,64
2,53
3,63
4,47
...,...
115,60
116,61
117,62
118,62


In [52]:
class BipartiteMatching:
    """
    X_df, Y_dfの間で二部マッチングを行うためのクラス
    """
    def __init__(self, X_df, Y_df):
        self.X_df = X_df
        self.Y_df = Y_df
    
        self.xn = len(self.X_df)
        self.yn = len(self.Y_df)


    def match(self, col_name, max_gap):
        """
        matchさせる列（数値の列のみ）と、max_gapを入力
        数値がmax_gap以下のペアを最大限構成する
        
        matched[y_index] は対応するxのindex, 対応しない場合は-1
        """
        
        def dfs(v, visited):
            """
            :param v: X側の未マッチングの頂点の1つ
            :param visited: 空のsetを渡す（外部からの呼び出し時）
            :return: 増大路が見つかればTrue
            """
            for u in self.edges[v]:
                if u in visited:
                    continue
                visited.add(u)
                if self.matched[u] == -1 or dfs(self.matched[u], visited):
                    self.matched[u] = v
                    return True
            return False
        
        
        self.edges = [set() for _ in range(self.xn)]
        
        # max_gap以内の場合に辺を張るグラフを構成
        for x in self.X_df.index:
            for y in self.Y_df.index:
                xa = self.X_df.loc[x, col_name]
                ya = self.Y_df.loc[y, col_name]
                if abs(xa-ya) <= max_gap:
                    self.edges[x].add(y)
        
        self.matched = [-1] * self.yn
        
        for s in range(self.xn):
            dfs(s, set())
        
        return self.matched

In [53]:
BM = BipartiteMatching(Y_df, X_df)
matched = BM.match("age", 3)
sum([m >= 0 for m in matched])

82