In [2]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

# Load the dataset
data = pd.read_csv("cars_2024.csv")

# Create a graph where edges represent shared states and weights are the registration frequencies
G = nx.Graph()

for state, group in data.groupby('state'):
    models = group['model']
    model_pairs = [(model1, model2) for i, model1 in enumerate(models) for model2 in models[i + 1:]]
    for model1, model2 in model_pairs:
        if G.has_edge(model1, model2):
            G[model1][model2]['weight'] += len(group)
        else:
            G.add_edge(model1, model2, weight=len(group), state=state)

# Extract edge weights
edge_weights = [d['weight'] for _, _, d in G.edges(data=True)]

# Visualize the graph
plt.figure(figsize=(14, 12))
pos = nx.spring_layout(G, seed=42)
nx.draw_networkx_nodes(G, pos, node_size=500, alpha=0.8)
nx.draw_networkx_edges(G, pos, width=[w / 10 for w in edge_weights], alpha=0.5)
nx.draw_networkx_labels(G, pos, font_size=9, font_family="sans-serif")
plt.title("Vehicle Model Network (Shared States with Registration Frequency as Weight)")
plt.show()

MemoryError: 