In [13]:
def read_concerts(file_path):
    concerts = {}
    with open(file_path, 'r') as file:
        for line in file:
            if not line.startswith('#') and line.strip():
                genre, count = line.split(':')
                concerts[genre.strip()] = int(count.strip())
    concerts_list = [[item[0], item[1]] for item in concerts.items()]
    return concerts_list

In [21]:
import pandas as pd
import json
from pathlib import Path

# Define paths using pathlib
data_dir = Path.cwd() /'Data'/'grupee_data'
friends_path = data_dir / 'friends.csv'
concerts_path = data_dir / 'n_concerts.txt'
preferences_path = data_dir  / 'preferences.json'

# Load data using pathlib paths
edges = pd.read_csv(friends_path)
n_concerts = read_concerts(concerts_path)

both_like = 393 / 1000
one_like = 18 / 1000
neither_like = 2 / 1000

with open(preferences_path, 'r') as file:
    preferences = json.load(file)

## Calculate Risk Score for each person

In [33]:
import json
import csv
import numpy as np
from alive_progress import alive_bar

concerts = n_concerts
# Perform the division and multiplication while maintaining the tuple structure
concert_per_two_weeks_scaler = [((vc[1] / 52.1429) * 2.0) for vc in concerts]
# visualizations.visualize_concerts(concert_per_two_weeks, "two weeks")
# print("visit per two weeks", concert_per_two_weeks)


# Read the connections between grupees
friend_pairs = []
with open(friends_path, 'r') as file:
    reader = csv.reader(file)
    for row in reader:
        friend_pairs.append(row)
    friend_pairs.pop(0) # first line contains an unnecessary comment

# probability to infect a friend depending on preferences
both_like = 393/1000
one_like = 18/1000
neither_like = 2/1000
genre_count = len(preferences[friend_pairs[0][0]]) 
risk_per_id = [[0] * genre_count for _ in range(len(preferences.keys()))]
num_friends_per_id = [0] * len(preferences.keys())
with alive_bar(len(friend_pairs), title='Processing Friend IDs') as bar:
    for f_id in friend_pairs: # f_id is a list of two friend ids
        risk_per_id_row = [0] * genre_count
        pref_1 = preferences[f_id[0]]
        pref_2 = preferences[f_id[1]]
        num_friends_per_id[int(f_id[0])] += 1
        num_friends_per_id[int(f_id[1])] += 1
        for i, (p1, p2) in enumerate(zip(pref_1, pref_2)): # iterate over the prefereed genres of the two friends
            if p1 == '1' and p2 == '1':
                risk_per_id_row[i] += both_like
            elif p1 == '1' or p2 == '1':
                risk_per_id_row[i] += one_like
            else:
                risk_per_id_row[i] += neither_like
        # Element wise addition of the risk per genre
        risk_per_id[int(f_id[0])] = [x + y for x, y in zip(risk_per_id[int(f_id[0])], risk_per_id_row)]
        risk_per_id[int(f_id[1])] = [x + y for x, y in zip(risk_per_id[int(f_id[1])], risk_per_id_row)]
        bar()
# scale the risk per genre of each grupee by the number of concerts per two weeks
scaled_risk_per_id = [[x * y for x, y in zip(row, concert_per_two_weeks_scaler)] for row in risk_per_id] # theoretically could scale by concerts instead
# Sum the elements of each row in scaled_risk_per_ids_concert
sum_scaled_risk_per_id = [sum(row) for row in scaled_risk_per_id]
# Calculate the number of top elements to select (12% of the total length)
top_percentage = 0.12
num_top_elements = int(len(sum_scaled_risk_per_id) * top_percentage)

# Get the indices of the top elements
top_indices = np.argsort(sum_scaled_risk_per_id)[-num_top_elements:][::-1]

# Get the values of the top elements
top_values = [sum_scaled_risk_per_id[i] for i in top_indices]

print("Top 12% indices:", top_indices)
print("Top 12% values:", top_values)
# top 12% of the riskiest people to be affected by the virus (due to visitation of concerts with friends)
# FROM HERE ON: 
# top_indices: list of ids of the top 12% of the riskiest people to be affected by the virus (sorted by risk)
# top_values: list of the corresponding risks of the top 12% of the riskiest people to be affected by the virus

Processing Friend IDs |████████████████████████████████████████| 55482/55482 [100%] in 0.9s (60817.86/s) 
Top 12% indices: [1612  419 1210  312  506  294 1197  108 3764 1315  715 2435 1687  587
  160 1974  327 4011 2595  694 3664 2654 7233  855 4207 1805  446 2242
 1923 7296 6678  492 1947  725  600 1658  260 4024 1837  269 6005  366
 3578  904 5569 4352 2237 3322  532 8201  416  554 4726  588 2020   48
 4017  615 3660  552 2894  444  168 2408  590  300  279  177 1305 4022
 4013 7273   17  942 7060    2 1886  114 1806 2117 4935  480  384  349
 1059 5139  430 3498 1484  768  342  328  391 1526 1709  544 2788  468
 1504 2004 3417  985 4006  102 5486 5797 7392 6568 7359 3738 3495 8095
  306  443 2003  249  101  243 5254  320  555 5560  182  388 4977  284
 4702 2010 4060 5775 2244 4530 4113  621  811  118  104 1877 2898 7871
  365  909 2604  252 1304 7278 1408 4786 1316 4761 3081 5443  520  149
 7261 1232 7258 1530  363  598 4595 5049  469  299   39 4812 4796 1951
  125 1538 6714  133  607

In [30]:
genres = {
    0: "Classical",
    1: "Folk",
    2: "Jazz Hip Hop",
    3: "Electro Pop/Electro Rock",
    4: "Dancefloor",
    5: "Indie Rock/Rock pop",
    6: "Singer & Songwriter",
    7: "Comedy",
    8: "Musicals",
    9: "Chill Out/Trip-Hop/Lounge",
    10: "Soundtracks",
    11: "Disco",
    12: "Old school soul",
    13: "Rock",
    14: "Romantic",
    15: "Bluegrass",
    16: "Indie Rock",
    17: "Contemporary Soul",
    18: "Blues",
    19: "Old School",
    20: "Baroque",
    21: "Instrumental jazz",
    22: "Urban Cowboy",
    23: "Asian Music",
    24: "Tropical",
    25: "Early Music",
    26: "Classic Blues",
    27: "Indie Pop",
    28: "Bolero",
    29: "Spirituality & Religion",
    30: "Dancehall/Ragga",
    31: "Dance",
    32: "R&B",
    33: "Pop",
    34: "Film Scores",
    35: "Grime",
    36: "Electro Hip Hop",
    37: "Metal",
    38: "West Coast",
    39: "Acoustic Blues",
    40: "Indie Pop/Folk",
    41: "International Pop",
    42: "Sports",
    43: "Trance",
    44: "Ska",
    45: "Brazilian Music",
    46: "Bollywood",
    47: "Nursery Rhymes",
    48: "Alternative Country",
    49: "Indian Music",
    50: "TV shows & movies",
    51: "Dubstep",
    52: "Classical Period",
    53: "Chicago Blues",
    54: "Vocal jazz",
    55: "TV Soundtracks",
    56: "Latin Music",
    57: "Rock & Roll/Rockabilly",
    58: "Delta Blues",
    59: "African Music",
    60: "Opera",
    61: "Ranchera",
    62: "Oldschool R&B",
    63: "Kids & Family",
    64: "Modern",
    65: "Soul & Funk",
    66: "Electro",
    67: "Alternative",
    68: "Dub",
    69: "Electric Blues",
    70: "Rap/Hip Hop",
    71: "Techno/House",
    72: "Country Blues",
    73: "Traditional Country",
    74: "Country",
    75: "East Coast",
    76: "Contemporary R&B",
    77: "Jazz",
    78: "Game Scores",
    79: "Films/Games",
    80: "Reggae",
    81: "Hard Rock",
    82: "Kids",
    83: "Dirty South"
}

In [40]:
import networkx as nx

# Create a graph
G = nx.Graph()

# Add edges with weight 1
for _, row in edges.iterrows():
    person_id = row[0]
    friend_id = row[1]
    G.add_edge(person_id, friend_id, weight=1)

node_attributes = {}
for _, row in edges.iterrows():
    person_id = row[0]
    friend_id = row[1]
    node_attributes_person_id = {
        sum_scaled_risk_per_id[person_id] : "risk_score",
        num_friends_per_id[person_id]: "num_friends",
        risk_per_id[person_id][0]: "Classical",
        risk_per_id[person_id][1]: "Folk",
        risk_per_id[person_id][2]: "Jazz Hip Hop",
        risk_per_id[person_id][3]: "Electro Pop/Electro Rock",
        risk_per_id[person_id][4]: "Dancefloor",
        risk_per_id[person_id][5]: "Indie Rock/Rock pop",
        risk_per_id[person_id][6]: "Singer & Songwriter",
        risk_per_id[person_id][7]: "Comedy",
        risk_per_id[person_id][8]: "Musicals",
        risk_per_id[person_id][9]: "Chill Out/Trip-Hop/Lounge",
        risk_per_id[person_id][10]: "Soundtracks",
        risk_per_id[person_id][11]: "Disco",
        risk_per_id[person_id][12]: "Old school soul",
        risk_per_id[person_id][13]: "Rock",
        risk_per_id[person_id][14]: "Romantic",
        risk_per_id[person_id][15]: "Bluegrass",
        risk_per_id[person_id][16]: "Indie Rock",
        risk_per_id[person_id][17]: "Contemporary Soul",
        risk_per_id[person_id][18]: "Blues",
        risk_per_id[person_id][19]: "Old School",
        risk_per_id[person_id][20]: "Baroque",
        risk_per_id[person_id][21]: "Instrumental jazz",
        risk_per_id[person_id][22]: "Urban Cowboy",
        risk_per_id[person_id][23]: "Asian Music",
        risk_per_id[person_id][24]: "Tropical",
        risk_per_id[person_id][25]: "Early Music",
        risk_per_id[person_id][26]: "Classic Blues",
        risk_per_id[person_id][27]: "Indie Pop",
        risk_per_id[person_id][28]: "Bolero",
        risk_per_id[person_id][29]: "Spirituality & Religion",
        risk_per_id[person_id][30]: "Dancehall/Ragga",
        risk_per_id[person_id][31]: "Dance",
        risk_per_id[person_id][32]: "R&B",
        risk_per_id[person_id][33]: "Pop",
        risk_per_id[person_id][34]: "Film Scores",
        risk_per_id[person_id][35]: "Grime",
        risk_per_id[person_id][36]: "Electro Hip Hop",
        risk_per_id[person_id][37]: "Metal",
        risk_per_id[person_id][38]: "West Coast",
        risk_per_id[person_id][39]: "Acoustic Blues",
        risk_per_id[person_id][40]: "Indie Pop/Folk",
        risk_per_id[person_id][41]: "International Pop",
        risk_per_id[person_id][42]: "Sports",
        risk_per_id[person_id][43]: "Trance",
        risk_per_id[person_id][44]: "Ska",
        risk_per_id[person_id][45]: "Brazilian Music",
        risk_per_id[person_id][46]: "Bollywood",
        risk_per_id[person_id][47]: "Nursery Rhymes",
        risk_per_id[person_id][48]: "Alternative Country",
        risk_per_id[person_id][49]: "Indian Music",
        risk_per_id[person_id][50]: "TV shows & movies",
        risk_per_id[person_id][51]: "Dubstep",
        risk_per_id[person_id][52]: "Classical Period",
        risk_per_id[person_id][53]: "Chicago Blues",
        risk_per_id[person_id][54]: "Vocal jazz",
        risk_per_id[person_id][55]: "TV Soundtracks",
        risk_per_id[person_id][56]: "Latin Music",
        risk_per_id[person_id][57]: "Rock & Roll/Rockabilly",
        risk_per_id[person_id][58]: "Delta Blues",
        risk_per_id[person_id][59]: "African Music",
        risk_per_id[person_id][60]: "Opera",
        risk_per_id[person_id][61]: "Ranchera",
        risk_per_id[person_id][62]: "Oldschool R&B",
        risk_per_id[person_id][63]: "Kids & Family",
        risk_per_id[person_id][64]: "Modern",
        risk_per_id[person_id][65]: "Soul & Funk",
        risk_per_id[person_id][66]: "Electro",
        risk_per_id[person_id][67]: "Alternative",
        risk_per_id[person_id][68]: "Dub",
        risk_per_id[person_id][69]: "Electric Blues",
        risk_per_id[person_id][70]: "Rap/Hip Hop",
        risk_per_id[person_id][71]: "Techno/House",
        risk_per_id[person_id][72]: "Country Blues",
        risk_per_id[person_id][73]: "Traditional Country",
        risk_per_id[person_id][74]: "Country",
        risk_per_id[person_id][75]: "East Coast",
        risk_per_id[person_id][76]: "Contemporary R&B",
        risk_per_id[person_id][77]: "Jazz",
        risk_per_id[person_id][78]: "Game Scores",
        risk_per_id[person_id][79]: "Films/Games",
        risk_per_id[person_id][80]: "Reggae",
        risk_per_id[person_id][81]: "Hard Rock",
        risk_per_id[person_id][82]: "Kids",
        risk_per_id[person_id][83]: "Dirty South"
    }
    node_attributes_friend_id = {
        sum_scaled_risk_per_id[friend_id] : "risk_score",
        num_friends_per_id[friend_id]: "num_friends",
        risk_per_id[friend_id][0]: "Classical",
        risk_per_id[friend_id][1]: "Folk",
        risk_per_id[friend_id][2]: "Jazz Hip Hop",
        risk_per_id[friend_id][3]: "Electro Pop/Electro Rock",
        risk_per_id[friend_id][4]: "Dancefloor",
        risk_per_id[friend_id][5]: "Indie Rock/Rock pop",
        risk_per_id[friend_id][6]: "Singer & Songwriter",
        risk_per_id[friend_id][7]: "Comedy",
        risk_per_id[friend_id][8]: "Musicals",
        risk_per_id[friend_id][9]: "Chill Out/Trip-Hop/Lounge",
        risk_per_id[friend_id][10]: "Soundtracks",
        risk_per_id[friend_id][11]: "Disco",
        risk_per_id[friend_id][12]: "Old school soul",
        risk_per_id[friend_id][13]: "Rock",
        risk_per_id[friend_id][14]: "Romantic",
        risk_per_id[friend_id][15]: "Bluegrass",
        risk_per_id[friend_id][16]: "Indie Rock",
        risk_per_id[friend_id][17]: "Contemporary Soul",
        risk_per_id[friend_id][18]: "Blues",
        risk_per_id[friend_id][19]: "Old School",
        risk_per_id[friend_id][20]: "Baroque",
        risk_per_id[friend_id][21]: "Instrumental jazz",
        risk_per_id[friend_id][22]: "Urban Cowboy",
        risk_per_id[friend_id][23]: "Asian Music",
        risk_per_id[friend_id][24]: "Tropical",
        risk_per_id[friend_id][25]: "Early Music",
        risk_per_id[friend_id][26]: "Classic Blues",
        risk_per_id[friend_id][27]: "Indie Pop",
        risk_per_id[friend_id][28]: "Bolero",
        risk_per_id[friend_id][29]: "Spirituality & Religion",
        risk_per_id[friend_id][30]: "Dancehall/Ragga",
        risk_per_id[friend_id][31]: "Dance",
        risk_per_id[friend_id][32]: "R&B",
        risk_per_id[friend_id][33]: "Pop",
        risk_per_id[friend_id][34]: "Film Scores",
        risk_per_id[friend_id][35]: "Grime",
        risk_per_id[friend_id][36]: "Electro Hip Hop",
        risk_per_id[friend_id][37]: "Metal",
        risk_per_id[friend_id][38]: "West Coast",
        risk_per_id[friend_id][39]: "Acoustic Blues",
        risk_per_id[friend_id][40]: "Indie Pop/Folk",
        risk_per_id[friend_id][41]: "International Pop",
        risk_per_id[friend_id][42]: "Sports",
        risk_per_id[friend_id][43]: "Trance",
        risk_per_id[friend_id][44]: "Ska",
        risk_per_id[friend_id][45]: "Brazilian Music",
        risk_per_id[friend_id][46]: "Bollywood",
        risk_per_id[friend_id][47]: "Nursery Rhymes",
        risk_per_id[friend_id][48]: "Alternative Country",
        risk_per_id[friend_id][49]: "Indian Music",
        risk_per_id[friend_id][50]: "TV shows & movies",
        risk_per_id[friend_id][51]: "Dubstep",
        risk_per_id[friend_id][52]: "Classical Period",
        risk_per_id[friend_id][53]: "Chicago Blues",
        risk_per_id[friend_id][54]: "Vocal jazz",
        risk_per_id[friend_id][55]: "TV Soundtracks",
        risk_per_id[friend_id][56]: "Latin Music",
        risk_per_id[friend_id][57]: "Rock & Roll/Rockabilly",
        risk_per_id[friend_id][58]: "Delta Blues",
        risk_per_id[friend_id][59]: "African Music",
        risk_per_id[friend_id][60]: "Opera",
        risk_per_id[friend_id][61]: "Ranchera",
        risk_per_id[friend_id][62]: "Oldschool R&B",
        risk_per_id[friend_id][63]: "Kids & Family",
        risk_per_id[friend_id][64]: "Modern",
        risk_per_id[friend_id][65]: "Soul & Funk",
        risk_per_id[friend_id][66]: "Electro",
        risk_per_id[friend_id][67]: "Alternative",
        risk_per_id[friend_id][68]: "Dub",
        risk_per_id[friend_id][69]: "Electric Blues",
        risk_per_id[friend_id][70]: "Rap/Hip Hop",
        risk_per_id[friend_id][71]: "Techno/House",
        risk_per_id[friend_id][72]: "Country Blues",
        risk_per_id[friend_id][73]: "Traditional Country",
        risk_per_id[friend_id][74]: "Country",
        risk_per_id[friend_id][75]: "East Coast",
        risk_per_id[friend_id][76]: "Contemporary R&B",
        risk_per_id[friend_id][77]: "Jazz",
        risk_per_id[friend_id][78]: "Game Scores",
        risk_per_id[friend_id][79]: "Films/Games",
        risk_per_id[friend_id][80]: "Reggae",
        risk_per_id[friend_id][81]: "Hard Rock",
        risk_per_id[friend_id][82]: "Kids",
        risk_per_id[friend_id][83]: "Dirty South"
    }
    if person_id not in node_attributes:
        node_attributes[person_id] = node_attributes_person_id
    if friend_id not in node_attributes:
        node_attributes[friend_id] = node_attributes_friend_id

# New dictionary to store reversed dictionaries
reversed_node_attributes = {}

# Iterate through the outer dictionary
for outer_key, inner_dict in node_attributes.items():
    # Reverse the inner dictionary
    reversed_inner_dict = {value: key for key, value in inner_dict.items()}
    # Add the reversed inner dictionary to the new outer dictionary
    reversed_node_attributes[outer_key] = reversed_inner_dict

node_attributes = reversed_node_attributes

nx.set_node_attributes(G, node_attributes)

# Print the edges with weights to verify
print(G.edges(data=True))

  person_id = row[0]
  friend_id = row[1]
  person_id = row[0]
  friend_id = row[1]


[(np.int64(360), np.int64(4720), {'weight': 1}), (np.int64(360), np.int64(4721), {'weight': 1}), (np.int64(360), np.int64(4722), {'weight': 1}), (np.int64(360), np.int64(14), {'weight': 1}), (np.int64(360), np.int64(4723), {'weight': 1}), (np.int64(360), np.int64(1020), {'weight': 1}), (np.int64(360), np.int64(4724), {'weight': 1}), (np.int64(360), np.int64(4725), {'weight': 1}), (np.int64(360), np.int64(4726), {'weight': 1}), (np.int64(360), np.int64(4727), {'weight': 1}), (np.int64(360), np.int64(4728), {'weight': 1}), (np.int64(360), np.int64(4729), {'weight': 1}), (np.int64(360), np.int64(1286), {'weight': 1}), (np.int64(360), np.int64(4730), {'weight': 1}), (np.int64(360), np.int64(4731), {'weight': 1}), (np.int64(360), np.int64(4732), {'weight': 1}), (np.int64(360), np.int64(4733), {'weight': 1}), (np.int64(360), np.int64(4734), {'weight': 1}), (np.int64(360), np.int64(4735), {'weight': 1}), (np.int64(360), np.int64(4736), {'weight': 1}), (np.int64(360), np.int64(4737), {'weight'

## Graph experiments to get the 12% vaccinated