In [None]:
%pip install requests networkx matplotlib

In [14]:
import requests
import gzip
import shutil
import networkx as nx
import matplotlib.pyplot as plt

In [15]:
# URL da base de dados
url = "https://snap.stanford.edu/data/facebook_combined.txt.gz"
# Nome do arquivo compactado
compressed_file = "facebook_combined.txt.gz"
# Nome do arquivo descompactado
decompressed_file = "facebook_combined.txt"

# Baixar o arquivo
response = requests.get(url, stream=True)
with open(compressed_file, 'wb') as out_file:
    shutil.copyfileobj(response.raw, out_file)

# Descompactar o arquivo
with gzip.open(compressed_file, 'rb') as f_in:
    with open(decompressed_file, 'wb') as f_out:
        shutil.copyfileobj(f_in, f_out)

# Carregar a base de dados no NetworkX
G = nx.read_edgelist(decompressed_file)

In [16]:
# Define layout algorithms
layout_algorithms = [
    nx.circular_layout,
    nx.random_layout,
    nx.shell_layout,
    nx.spring_layout,
    nx.spectral_layout,
]

In [17]:
# Plot the graph with each layout algorithm
plt.figure(figsize=(300, 300))
for i, layout_algorithm in enumerate(layout_algorithms, start=1):
    plt.subplot(2, 3, i)
    pos = layout_algorithm(G)
    nx.draw(G, pos=pos, with_labels=False, node_size=5)
    plt.title(layout_algorithm.__name__)

plt.suptitle("Facebook Connections with Different Layouts")
plt.tight_layout()
plt.show()

KeyboardInterrupt: 

# Calculate additional metrics

In [None]:
# Graph summarization
import statistics


num_nodes = G.number_of_nodes()
num_edges = G.number_of_edges()
print(f"Graph has {num_nodes} nodes and {num_edges} edges.")

# Degree distribution
degrees = [degree for node, degree in G.degree()]
mean_degree = statistics.mean(degrees)
median_degree = statistics.median(degrees)
print(f"Average degree: {mean_degree}")
print(f"Median degree: {median_degree}")

Graph has 4039 nodes and 88234 edges.
Average degree: 43.69101262688784
Median degree: 25


In [None]:
# Largest component size
largest_cc = max(nx.connected_components(G), key=len)
largest_cc_size = len(largest_cc)
print(f"Largest component size: {largest_cc_size}")

# Distance distribution
path_lengths = dict(nx.all_pairs_shortest_path_length(G))
all_lengths = []
for lengths in path_lengths.values():
    all_lengths.extend(lengths.values())
mean_distance = statistics.mean(all_lengths)
median_distance = statistics.median(all_lengths)
print(f"Average distance: {mean_distance}")
print(f"Median distance: {median_distance}")

Largest component size: 4039
Average distance: 3.691592636562027
Median distance: 4


In [None]:
# Clustering coefficient
average_clustering = nx.average_clustering(G)
print(f"Average clustering coefficient: {average_clustering}")

# Betweenness centrality
betweenness_centrality = nx.betweenness_centrality(G)
mean_betweenness = statistics.mean(betweenness_centrality.values())
print(f"Average betweenness centrality: {mean_betweenness}")

Average clustering coefficient: 0.6055467186200862
Average betweenness centrality: 0.0006669573568730229


In [None]:
# Discussion of key features
print("\nDiscussion of key features:")
print(f"The Facebook network graph has {num_nodes} nodes and {num_edges} edges, "
      f"indicating a dense and well-connected network. The average degree is {mean_degree}, "
      f"with a median degree of {median_degree}, suggesting that most users have a moderate "
      f"number of connections, but some have significantly more. The largest connected component "
      f"contains {largest_cc_size} nodes, showing that a significant portion of the network is interconnected. "
      f"The average distance between nodes is {mean_distance}, with a median of {median_distance}, "
      f"indicating relatively short paths between users. The average clustering coefficient of {average_clustering} "
      f"demonstrates that friends of a user are likely to be friends with each other. Finally, the average betweenness "
      f"centrality is {mean_betweenness}, highlighting the presence of key nodes that act as bridges within the network.")


Discussion of key features:
The Facebook network graph has 4039 nodes and 88234 edges, indicating a dense and well-connected network. The average degree is 43.69101262688784, with a median degree of 25, suggesting that most users have a moderate number of connections, but some have significantly more. The largest connected component contains 4039 nodes, showing that a significant portion of the network is interconnected. The average distance between nodes is 3.691592636562027, with a median of 4, indicating relatively short paths between users. The average clustering coefficient of 0.6055467186200862 demonstrates that friends of a user are likely to be friends with each other. Finally, the average betweenness centrality is 0.0006669573568730229, highlighting the presence of key nodes that act as bridges within the network.
