In [1]:
import json
import numpy as np

In [2]:
friend_data = np.genfromtxt('grupee_data/friends.csv', delimiter=',', dtype=int)
friendships = [[] for _ in range(np.max(friend_data[:,0])+1)]

for id in set(friend_data[:,0]):
    friendships[id] = [id, friend_data[friend_data[:, 0] == id, 1]]

with open('grupee_data/preferences.json') as file:
    preferences = json.load(file)

with open('grupee_data/n_concerts.txt') as file:
    n_concerts = file.read().split('\n')[1:-1]
    concerts = np.array([(x.split(':')[0], x.split(':')[1]) for x in n_concerts])
    file.close()

In [3]:
friendship_matrix = np.empty((len(friendships), len(friendships)), dtype=int)

for i, id in enumerate(friendships):
    row = np.zeros(len(friendships))
    if id:
        row[id[1]] = 1
        friendship_matrix[i] = row
    else:
        friendship_matrix[i] = row
        
friendship_matrix = friendship_matrix + friendship_matrix.T - np.diag(friendship_matrix.diagonal())

<h3>Define likelihoods</h3>

In [4]:
visit_concerts = {
    'none': .002,
    'one': 0.018,
    'both': 0.393,
}

In [5]:
equalPrefs = []

for ind in range(len(preferences['0'])):
    genrePref = []
    for (id, row) in zip(preferences.keys(), preferences.values()):
        if row[ind] == '1': genrePref.append(int(id))
    equalPrefs.append(genrePref)

***
<h2>Start with Processing</h2>

<h3>Laplacian Matrix</h3>

In [6]:
degreeMat = np.diag(np.sum(friendship_matrix, axis=1))
laplacian = degreeMat - friendship_matrix

degrees = np.empty((len(friendships), 2))

for id in range(len(friendships)):
    degrees[id] = (id, laplacian[id][id])

print(f'12% of {len(friendships)}: {len(friendships) * .12}')
sortedDegrees = sorted(degrees, key=lambda tup: tup[1])
mostInfluential = np.array(sortedDegrees[len(friendships)-round(len(friendships) * .12):], dtype=int)
#np.savetxt('a_team_5.txt', np.array(list(reversed(mostInfluential[:,0]))), delimiter='\n', fmt='%i')

12% of 8311: 997.3199999999999


<h3>Some other model...<h3>

In [None]:
!pip install networkx

In [7]:
import networkx as nx

graph = nx.Graph()

graph.add_nodes_from(range(len(friendship_matrix)))

for id, row in enumerate(friendship_matrix):
    edges = [(id, x) for x in np.where(row == 1)[0]]
    myPref = [i for i, x in enumerate(preferences[f'{id}']) if x == '1']
    weights = np.zeros((len(edges), 84))
    for friend in edges:
        weight = np.zeros(84)
        weight.fill(visit_concerts['none'])
        friendPref = [i for i, x in enumerate(preferences[f'{friend[1]}']) if x == '1']
        
        common_interest = set(myPref) & set(friendPref)
        my_exclusive = set(myPref) - set(friendPref)
        friend_exclusive = set(friendPref) - set(myPref)
        
        weight[list(common_interest)] = visit_concerts['both']
        weight[list(my_exclusive)] = visit_concerts['one']
        weight[list(friend_exclusive)] = visit_concerts['one']
    
    graph.add_edges_from(edges, weight=weight)

In [8]:
print(graph.edges(1))

[(1, 0), (1, 11), (1, 14), (1, 28), (1, 29), (1, 30), (1, 31), (1, 32), (1, 33), (1, 34), (1, 35), (1, 36), (1, 37), (1, 38), (1, 39), (1, 40)]


In [9]:
import random
random.seed(42)
genres = range(84)

visitation_rates = np.zeros(graph.number_of_nodes())

for genre in genres:
    for i in range(int(concerts[:,1][genre])):
        visited = np.array([False for _ in range(graph.number_of_nodes())])
        for node in graph.nodes():
            edges = graph.edges(node)

            for edge in edges:
                if visited[edge[0]] or visited[edge[1]]: continue
                visit = random.random() < graph[edge[0]][edge[1]]['weight'][genre]
                if visit:
                    visitation_rates[[edge[0], edge[1]]] += 1
                    visited[[edge[0], edge[1]]] = True
                    break

In [10]:
test = [int(x) for x in concerts[:,1]]
print("No. of concerts:", sum(test))
print("Max visits:", max(visitation_rates))
print("Who visits max:", np.where(visitation_rates == max(visitation_rates))[0])
print("How many visit any concert:", len(np.where(visitation_rates > 0)[0]))

topTwelve = np.argsort(visitation_rates)[-997:]
print("Top twelve percent:", sorted(topTwelve))
# np.savetxt('b_team_5.txt', topTwelve, delimiter='\n', fmt='%i')

No. of concerts: 925
Max visits: 627.0
Who visits max: [108]
How many visit any concert: 8309
Top twelve percent: [0, 1, 2, 4, 5, 6, 7, 9, 11, 13, 14, 17, 18, 20, 23, 24, 28, 30, 31, 34, 35, 38, 39, 40, 41, 43, 45, 46, 47, 48, 49, 50, 51, 55, 56, 57, 59, 60, 61, 62, 64, 68, 69, 72, 73, 74, 77, 79, 80, 81, 82, 83, 86, 87, 88, 90, 93, 94, 96, 101, 102, 103, 104, 108, 109, 110, 111, 112, 114, 115, 116, 118, 119, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 138, 139, 141, 144, 145, 148, 149, 150, 156, 157, 158, 159, 160, 164, 166, 168, 171, 173, 174, 177, 179, 182, 184, 185, 186, 191, 192, 193, 196, 197, 200, 209, 210, 212, 214, 221, 223, 224, 225, 228, 232, 235, 239, 240, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 255, 256, 257, 260, 264, 265, 268, 269, 271, 272, 274, 279, 280, 281, 282, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 296, 298, 299, 300, 303, 305, 306, 308, 311, 312, 314, 316, 317, 318, 320, 322, 323, 327, 328, 331, 333, 33