In [None]:
# Step 1: load fb_friends and bt_symmetric csv files
# Step 2: Preprocess, drop the rows in bt_symmetric containing:
#    * Empty scans are marked with user B = -1 and RSSI = 0
#    * Scans of devices outside of the experiment are marked with user B = -2. All non-experiment devices are given the same ID.
# Step 3: Use the algorithm to get a confidence score for the association fb_friends -> friends irl
#    friends_irl_score(a, b) = total_ping_count(a, b)/rssi_total(a, b)
#    Its up to us to define what we consider as "friends". Are persons a and b friends if they have met once over the study (30 days)? Are they friends if they have met twice?
#    Its up to us to define it and then assume the definition for our own data mining study.
# Step 4: Plot graphs
# Step 5: Interpret the results

In [21]:
import matplotlib.pyplot as plt
import pandas as pd

# Step 1
fb_data = pd.read_csv('copenhagen-data/fb_friends.csv')
bt_data = pd.read_csv('copenhagen-data/bt_symmetric.csv')
gd_data = pd.read_csv('copenhagen-data/genders.csv')

# Step 2
# drope empty scans
bt_data = bt_data[(bt_data.user_b != -1) & (bt_data.rssi != 0)]
# drop non-experiment devices
bt_data = bt_data[bt_data.user_b != -2]

# Step 3
# if user_a and user_b are real (they are defined in the dataset)
# then return the friendship score
# otherwise return None. Since they are not real
def friends_irl_score(a, b):
    # Filter the data for the given users `a` and `b`
    filtered_data = bt_data[(bt_data.user_a == a) & (bt_data.user_b == b)]
    
    # Check if there is any matching data
    if filtered_data.empty:
        return 0
    
    # Calculate the total ping count and RSSI sum
    total_ping_count = len(filtered_data)
    rssi_total = filtered_data.rssi.sum()
    
    # We assume that rssi_total can never be 0
    if rssi_total == 0:
        return 0
    
    # Calculate and return the friendship score
    return total_ping_count / rssi_total

persons_a = set(bt_data["user_a"])
persons_b = set(bt_data["user_b"])

# Create a list to store the new rows
rows = []

# Populate the list with friendship scores
for a in persons_a:
    for b in persons_b:
        # Calculate the friendship score
        score = friends_irl_score(a, b)
        
        # Store the row data in a dictionary
        rows.append({'a': a, 'b': b, 'score': score})

# Convert the list of rows to a DataFrame
data = pd.DataFrame(rows)

# Save the DataFrame to a CSV file
data.to_csv("friendscore.csv", index=False)

#plt.scatter(bt_data["sw"], bt_data["sl"], c=kmeans.labels_, cmap='viridis')
#plt.show()


In [2]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

# Step 1: Read in the friendship score data
fsc_data = pd.read_csv('friendscore.csv')

# Step 2: Convert the 'score' column to absolute values to ensure all scores are positive
fsc_data['score'] = fsc_data['score'].abs()

# Optional: Scale up the score to emphasize small differences
fsc_data['score'] = fsc_data['score'] * 10000000  # Multiply by a factor to make values more readable in heatmap

# Step 3: Pivot data to create a matrix-like format for the heatmap
pivot_table = fsc_data.pivot(index='a', columns='b', values='score')

# Step 4: Plotting heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(pivot_table, annot=True, cmap="YlGnBu", cbar_kws={'label': 'Friendship Score (Scaled)'})
plt.title('Friendship Score Heatmap (Scaled)')
plt.xlabel('User B')
plt.ylabel('User A')
plt.show()



TypeError: DataFrame.pivot() takes 1 positional argument but 4 were given