In [67]:
from collections import Counter, defaultdict

users = [
    {"id": 0, "name": "Hero"},
    {"id": 1, "name": "Dunn"},
    {"id": 2, "name": "Sue"},
    {"id": 3, "name": "Chi"},
    {"id": 4, "name": "Thor"},
    {"id": 5, "name": "Clive"},
    {"id": 6, "name": "Hicks"},
    {"id": 7, "name": "Devin"},
    {"id": 8, "name": "Kate"},
    {"id": 9, "name": "Klein"},
]

# Fetching the pairs in a tuple list just once, convert to a dict
friendship_pairs = [(0,1),(0,2),(1,2),(1,3),(2,3),(3,4),
                  (4,5),(5,6),(5,7),(6,8),(7,8),(8,9)] 

# Creating a dict to search for pairs, because a dictionary search is faster
friendships = { user["id"] : [] for user in users }

for i, j in friendship_pairs:
    friendships[i].append(j)
    friendships[j].append(i)
    
def number_of_friends(user):
    user_id = user["id"]
    friend_ids = friendships[user_id]
    return len(friend_ids)

In [68]:
total_connections = sum(number_of_friends(user) for user in users)
num_users = len(users)
avg_connections = total_connections / num_users

numb_friends_by_id = [(user["id"], number_of_friends(user)) for user in users]
numb_friends_by_id.sort(key = lambda id_and_friends: id_and_friends[1], reverse = True)


def foaf_ids_bad(user):
    return [foaf_id for friend_id in friendships[user["id"]]
                for foaf_id in friendships[friend_id]]

foaf_ids_list = [(user["id"], foaf_ids_bad(user)) for user in users]

    
def friends_of_friends(user_id):
    return Counter(
        foaf_id
        for friend_id in friendships[user_id]
        for foaf_id in friendships[friend_id]
        if foaf_id != user_id and foaf_id not in friendships[user_id]
    )

In [69]:
interests = [
    (0, "Hadoop"), (0, "Big Data"), (0, "HBase"), (0, "Java"),
    (0, "Spark"), (0, "Storm"), (0, "Cassandra"),
    (1, "NoSQL"), (1, "MongoDB"), (1, "Cassandra"), (1, "HBase"),
    (1, "Postgres"), (2, "Python"), (2, "scikit-learn"), (2, "scipy"),
    (2, "numpy"), (2, "statsmodels"), (2, "pandas"), (3, "R"), (3, "Python"),
    (3, "statistics"), (3, "regression"), (3, "probability"),
    (4, "machine learning"), (4, "regression"), (4, "decision trees"),
    (4, "libsvm"), (5, "Python"), (5, "R"), (5, "Java"), (5, "C++"),
    (5, "Haskell"), (5, "programming languages"), (6, "statistics"),
    (6, "probability"), (6, "mathematics"), (6, "theory"),
    (7, "machine learning"), (7, "scikit-learn"), (7, "Mahout"),
    (7, "neural networks"), (8, "neural networks"), (8, "deep learning"),
    (8, "Big Data"), (8, "artificial intelligence"), (9, "Hadoop"),
    (9, "Java"), (9, "MapReduce"), (9, "Big Data")
]

# Example for small data volume
def data_scientist_who_like(target_interest):
    return [user_id
        for user_id, user_interest in interests if user_interest == target_interest]

# Best option for big data
user_ids_by_interest = defaultdict(list)
interest_by_user_id = defaultdict(list)

for user_id, interest in interests:
    user_ids_by_interest[interest].append(user_id)
    
for user_id, interest in interests:
    interest_by_user_id[user_id].append(interest)
    
def most_common_interest_with(user):
    return Counter(
        interested_user_id
        for interest in interest_by_user_id[user["id"]]
        for interested_user_id in user_ids_by_interest[interest]
        if interested_user_id != user["id"]
    )

In [74]:
# OutPuts

for id, foaf in foaf_ids_list:
    print(f"Foaf friends {id, foaf}")
    
print("\n Friends of friends \n",friends_of_friends(users[3]["id"]))
print("\n Total of connections\n",total_connections)
print("\n Frindships\n",friendships)
print("\n Average \n",avg_connections)
print("\n Counter number friends\n",numb_friends_by_id)

print("\n Interest user id \n",interest_by_user_id)
print("\n User id interest \n",user_ids_by_interest)
print("\n Counter interest by id \n",most_common_interest_with(users[3]))

Foaf friends (0, [0, 2, 3, 0, 1, 3])
Foaf friends (1, [1, 2, 0, 1, 3, 1, 2, 4])
Foaf friends (2, [1, 2, 0, 2, 3, 1, 2, 4])
Foaf friends (3, [0, 2, 3, 0, 1, 3, 3, 5])
Foaf friends (4, [1, 2, 4, 4, 6, 7])
Foaf friends (5, [3, 5, 5, 8, 5, 8])
Foaf friends (6, [4, 6, 7, 6, 7, 9])
Foaf friends (7, [4, 6, 7, 6, 7, 9])
Foaf friends (8, [5, 8, 5, 8, 8])
Foaf friends (9, [6, 7, 9])

 Friends of friends 
 Counter({0: 2, 5: 1})

 Total of connections
 24

 Frindships
 {0: [1, 2], 1: [0, 2, 3], 2: [0, 1, 3], 3: [1, 2, 4], 4: [3, 5], 5: [4, 6, 7], 6: [5, 8], 7: [5, 8], 8: [6, 7, 9], 9: [8]}

 Average 
 2.4

 Counter number friends
 [(1, 3), (2, 3), (3, 3), (5, 3), (8, 3), (0, 2), (4, 2), (6, 2), (7, 2), (9, 1)]

 Interest user id 
 defaultdict(<class 'list'>, {0: ['Hadoop', 'Big Data', 'HBase', 'Java', 'Spark', 'Storm', 'Cassandra'], 1: ['NoSQL', 'MongoDB', 'Cassandra', 'HBase', 'Postgres'], 2: ['Python', 'scikit-learn', 'scipy', 'numpy', 'statsmodels', 'pandas'], 3: ['R', 'Python', 'statistics', '