In [488]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.preprocessing import MultiLabelBinarizer, LabelEncoder
from itertools import combinations 

In [489]:
people = [
    {
        'name': 'Bob',
        'availability': ['2021-01-10', '2021-01-11'],
        'interests': ['rock climbing', 'tech', 'data science'],
    },
    {
        'name': 'Joe',
        'availability': ['2021-01-10', '2021-01-09'],
        'interests': ['rock climbing', 'swimming', 'data science'],
    },
    {
        'name': 'Carolyn',
        'availability': ['2021-01-11', '2021-01-12'],
        'interests': ['data science'],
    },
    {
        'name': 'Dan',
        'availability': ['2021-01-12'],
        'interests': ['rock climbing'],
    },
]

In [522]:
def pool_matching(people):
    # list of possible distinct interests
    interests = []
    # list of possible distinct dates
    dates = []
    
    shared_availability = {}
    shared_interests = {}
    
    # this creates lists of unique interests and dates
    for person in people:
        interests.extend(person['interests'])
        dates.extend(person['availability'])
    interests = list(set(interests))
    dates = list(set(dates))
    
    # loops through people and creates a dictionary with keys as distinct dates and values as combinations of people who are available those days
    shared_availability = {}
    matched_names = []
    for date in dates:
        matches = []
        for person in people:
            if date in person['availability']:
                matches.append(person['name'])
        if len(matches) > 1:
            matched_names.extend(matches)
            # we create a list of unique combinations because there may be more than 2 people available a given day
            matches = list(combinations(matches,2))
            shared_availability[date] = matches
    
    # checks for people who had zero matches for avalability -- done with python set comparison 
    all_names = set([i['name'] for i in people])
    matched_names = set(matched_names)
    unmatched = list(all_names.symmetric_difference(matched_names))
    unmatched
    
    # loops through people and creates a dictionary with keys as distinct interests and values as combinations of people who have those interests
    for interest in interests:
        matches = []
        for person in people:
            if interest in person['interests']:
                matches.append(person['name'])
        if len(matches) > 1:
            # we create a list of unique combinations because there are likely more than 2 people who share a given interest
            matches = list(combinations(matches,2))
            shared_interests[interest] = matches
    
    # finally we loop through shared interests and our matches for date availability and search for shared interests
    all_matches = []
    for date in shared_availability.keys():
        for potential_match in shared_availability[date]:
            date = date
            match = potential_match
            #list of shared interest for each date match
            si = []
            for interest in shared_interests.keys():
                if potential_match in shared_interests[interest]:
                    si.append(interest)
            all_matches.append({
            'match': match,
            'scheduled_date': date,
            'overlapping_interests': si,
            })
    
    return all_matches, unmatched


In [524]:
matches, no_matches = pool_matching(people)

In [525]:
matches

[{'match': ('Bob', 'Joe'),
  'scheduled_date': '2021-01-10',
  'overlapping_interests': ['rock climbing', 'data science']},
 {'match': ('Carolyn', 'Dan'),
  'scheduled_date': '2021-01-12',
  'overlapping_interests': []},
 {'match': ('Bob', 'Carolyn'),
  'scheduled_date': '2021-01-11',
  'overlapping_interests': ['data science']}]

In [526]:
no_matches

[]

In [490]:
interests = []
dates = []
for person in people:
    interests.extend(person['interests'])
    dates.extend(person['availability'])
interests = list(set(interests))
dates = list(set(dates))

In [512]:
shared_availability = {}
matched_names = []

for date in dates:
    matches = []
    for person in people:
        if date in person['availability']:
            matches.append(person['name'])
    if len(matches) > 1:
        matched_names.extend(matches)
        matches = list(combinations(matches,2))
        shared_availability[date] = matches

all_names = set([i['name'] for i in people])
matched_names = set(matched_names)
unmatched = list(all_names.symmetric_difference(matched_names))
unmatched

[]

In [475]:
shared_interests = {}
for interest in interests:
    matches = []
    for person in people:
        if interest in person['interests']:
            matches.append(person['name'])
    if len(matches) > 1:
        matches = list(combinations(matches,2))
        shared_interests[interest] = matches

In [476]:
matchbook = []
for date in shared_availability.keys():
    for potential_match in shared_availability[date]:
        date = date
        match = potential_match
        si = []
        for interest in shared_interests.keys():
            if potential_match in shared_interests[interest]:
                si.append(interest)
        matchbook.append({
        'match': match,
        'scheduled_date': date,
        'overlapping_interests': si,
        })
        
print(matchbook)

[{'match': ('Bob', 'Joe'), 'scheduled_date': '2021-01-10', 'overlapping_interests': ['rock climbing', 'data science']}, {'match': ('Carolyn', 'Dan'), 'scheduled_date': '2021-01-12', 'overlapping_interests': []}, {'match': ('Bob', 'Carolyn'), 'scheduled_date': '2021-01-11', 'overlapping_interests': ['data science']}]


In [None]:
[{'match': ('Bob', 'Joe'), 'scheduled_date': '2021-01-10', 'overlapping_interests': ['rock climbing', 'data science']}, 
 {'match': ('Carolyn', 'Dan'), 'scheduled_date': '2021-01-12', 'overlapping_interests': []}, 
 {'match': ('Bob', 'Carolyn'), 'scheduled_date': '2021-01-11', 'overlapping_interests': ['data science']}]

In [453]:
dates = list(shared_availability.keys())
shared_date_combos = list(shared_availability.values())

In [468]:
shared_availability['2021-01-10']

[('Bob', 'Joe')]

In [465]:
shared_date_combos[0][0] in shared_interests['data science']

True

In [461]:
shared_interests

{'rock climbing': [('Bob', 'Joe'), ('Bob', 'Dan'), ('Joe', 'Dan')],
 'data science': [('Bob', 'Joe'), ('Bob', 'Carolyn'), ('Joe', 'Carolyn')]}

In [459]:
shared_date_combos[0][0] in shared_interests['rock climbing']

True

In [434]:
comb = combinations(shared_availability['2021-01-10'], 2)

In [436]:
list(comb)

[('Bob', 'Joe')]

In [318]:
int_list = []
date_list = []
for person in people:
    int_list.extend(person['interests'])
    date_list.extend(person['availability'])
int_list = dict(enumerate(set(int_list)))
int_lookup = dict(zip(int_list.values(), int_list.keys()))
date_list = dict(enumerate(set(date_list)))
date_lookup = dict(zip(date_list.values(), date_list.keys()))
print(date_lookup)
print(int_lookup)

{'2021-01-10': 0, '2021-01-09': 1, '2021-01-12': 2, '2021-01-11': 3}
{'tech': 0, 'swimming': 1, 'rock climbing': 2, 'data science': 3}


In [320]:
people = pd.DataFrame(people)
people.head()
parr = np.array(people)

In [321]:
parr

array([['Bob', list(['2021-01-10', '2021-01-11']),
        list(['rock climbing', 'tech', 'data science'])],
       ['Joe', list(['2021-01-10', '2021-01-09']),
        list(['rock climbing', 'swimming', 'data science'])],
       ['Carolyn', list(['2021-01-11', '2021-01-12']),
        list(['data science'])],
       ['Dan', list(['2021-01-12']), list(['rock climbing'])]],
      dtype=object)

In [307]:
interests_binarizer = MultiLabelBinarizer()
availability_binarizer = MultiLabelBinarizer()

In [308]:
people['interests'] = list(interests_binarizer.fit_transform(people['interests']))
interests_lookup = dict(list(enumerate(interests_binarizer.classes_)))
interests_lookup

{0: 'data science', 1: 'rock climbing', 2: 'swimming', 3: 'tech'}

In [309]:
people['availability'] = list(availability_binarizer.fit_transform(people['availability']))
availability_lookup = dict(list(enumerate(availability_binarizer.classes_)))
availability_lookup

{0: '2021-01-09', 1: '2021-01-10', 2: '2021-01-11', 3: '2021-01-12'}

In [313]:
people_arr = np.array(people)
people_arr

array([['Bob', array([0, 1, 1, 0]), array([1, 1, 0, 1])],
       ['Joe', array([1, 1, 0, 0]), array([1, 1, 1, 0])],
       ['Carolyn', array([0, 0, 1, 1]), array([1, 0, 0, 0])],
       ['Dan', array([0, 0, 0, 1]), array([0, 1, 0, 0])]], dtype=object)

In [296]:
availability_matches = []
for i in range(people_arr.shape[0]):
    matches = []
    for j in range(i,people_arr.shape[0] ):
        if (people_arr[i][1] == people_arr[j][1]).any():
            matches.append(people_arr[j][0])
    availability_matches.append(matches)  

In [297]:
availability_matches

[['Bob', 'Joe', 'Carolyn', 'Dan'], ['Joe', 'Dan'], ['Carolyn', 'Dan'], ['Dan']]

In [298]:
people['matches'] = availability_matches
people

Unnamed: 0,name,availability,interests,matches
0,Bob,"[0, 1, 1, 0]","[1, 1, 0, 1]","[Bob, Joe, Carolyn, Dan]"
1,Joe,"[1, 1, 0, 0]","[1, 1, 1, 0]","[Joe, Dan]"
2,Carolyn,"[0, 0, 1, 1]","[1, 0, 0, 0]","[Carolyn, Dan]"
3,Dan,"[0, 0, 0, 1]","[0, 1, 0, 0]",[Dan]
