## DataSciencester - Networking App for Data Scientists

In [27]:
from collections import Counter
from collections import defaultdict

In [None]:
# Functions

def number_of_friends(user):
    """How many friends _user_ have?"""
    user_id = user["id"]
    friend_ids = friendships[user_id]
    return len(friend_ids)

# Unfiltered version of def friends_of_friends func
def foaf_ids_bad(user):
    """foaf is short for a "friend of a friend" """
    user_id = user["id"]
    return[foaf_id for friend_id in friendships[user_id] for foaf_id in friendships[friend_id]]

# Correct one 
def friends_of_friends(user):
    user_id = user["id"]
    return Counter(foaf_id for friend_id in friendships[user_id] for foaf_id in friendships[friend_id] 
                   if foaf_id != user_id and foaf_id not in friendships[user_id])

def data_scientist_who_like(target_interest):
    """Find the ids of all users who like the target interest"""
    return [user_id 
            for user_id, user_interest in interests
            if user_interest == target_interest]

def most_common_interests_with(user):
    user_id = user["id"]
    return Counter(
        interested_user_id
        for interest in interests_by_user_id[user_id]
        for interested_user_id in users_ids_by_interest[interest]
        if interested_user_id != user_id
    )

In [None]:
# Data

users = [{"id": 0, "name": "Hero"},
        {"id": 1, "name": "Dunn"},
        {"id": 2, "name": "Sue"},
        {"id": 3, "name": "Chi"},
        {"id": 4, "name": "Thor"},
        {"id": 5, "name": "Clive"},
        {"id": 6, "name": "Hicks"},
        {"id": 7, "name": "Devin"},
        {"id": 8, "name": "Kate"},
        {"id": 9, "name": "Klein"},]

friendship_pairs= [(0,1),(0,2),(1,2),(1,3),(2,3),(3,4),(4,5),(5,6),(5,7),(6,8),(7,8),(8,9)]

interests = [
 (0, "Hadoop"), (0, "Big Data"), (0, "HBase"), (0, "Java"),
 (0, "Spark"), (0, "Storm"), (0, "Cassandra"),
 (1, "NoSQL"), (1, "MongoDB"), (1, "Cassandra"), (1, "HBase"),
 (1, "Postgres"), (2, "Python"), (2, "scikit-learn"), (2, "scipy"),
 (2, "numpy"), (2, "statsmodels"), (2, "pandas"), (3, "R"), (3, "Python"),
 (3, "statistics"), (3, "regression"), (3, "probability"),
 (4, "machine learning"), (4, "regression"), (4, "decision trees"),
 (4, "libsvm"), (5, "Python"), (5, "R"), (5, "Java"), (5, "C++"),
 (5, "Haskell"), (5, "programming languages"), (6, "statistics"),
 (6, "probability"), (6, "mathematics"), (6, "theory"),
 (7, "machine learning"), (7, "scikit-learn"), (7, "Mahout"),
 (7, "neural networks"), (8, "neural networks"), (8, "deep learning"),
 (8, "Big Data"), (8, "artificial intelligence"), (9, "Hadoop"),
 (9, "Java"), (9, "MapReduce"), (9, "Big Data")
]

In [14]:
# criação de listas vazias
friendships = {user["id"]: [] for user in users}
print(friendships)

# coloca os pares para serem amigos
for i, j in friendship_pairs:
    friendships[i].append(j)
    friendships[j].append(i)
    
print(friendships)

{0: [], 1: [], 2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: []}
{0: [1, 2], 1: [0, 2, 3], 2: [0, 1, 3], 3: [1, 2, 4], 4: [3, 5], 5: [4, 6, 7], 6: [5, 8], 7: [5, 8], 8: [6, 7, 9], 9: [8]}


In [None]:
# Info

total_connections = sum(number_of_friends(user) for user in users)
print(total_connections)
num_users = len(users)
print(num_users)
avg_connections = total_connections / num_users
print(avg_connections)

24
10
2.4


In [16]:
#cria uma lista id/amizades
num_friends_by_id = [(user["id"], number_of_friends(user)) for user in users]
print("ID/FRIENDS: ",num_friends_by_id)

#ordem de mais amizades pra menos
num_friends_by_id.sort(key=lambda id_and_friends: id_and_friends[1], reverse=True)
print("ID/FRIENDS (IN ORDER): ", num_friends_by_id)

ID/FRIENDS:  [(0, 2), (1, 3), (2, 3), (3, 3), (4, 2), (5, 3), (6, 2), (7, 2), (8, 3), (9, 1)]
ID/FRIENDS (IN ORDER):  [(1, 3), (2, 3), (3, 3), (5, 3), (8, 3), (0, 2), (4, 2), (6, 2), (7, 2), (9, 1)]


In [None]:
# Data Scientists You May Know

foaf_ids_bad(users[0])
# Puxa dados repetidos pois apenas junta todas as infos numa lista só

[0, 2, 3, 0, 1, 3]

In [25]:
friends_of_friends(users[3])
# Tira dados repetidos e também exclui o próprio ID

Counter({0: 2, 5: 1})

In [None]:
# Seeing one by one how many Data Scientists like a specific field

data_scientist_who_like("Big Data")

[0, 8, 9]

In [42]:
# Interests in Common (All in One)

users_ids_by_interest = defaultdict(list)

for user_id, interest in interests:
    users_ids_by_interest[interest].append(user_id)

interests_by_user_id = defaultdict(list)

for user_id, interest in interests:
    interests_by_user_id[user_id].append(interest)

most_common_interests_with(users[0])


Counter({9: 3, 1: 2, 8: 1, 5: 1})