In [24]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns


In [25]:
edges = pd.read_csv('edges.csv')
vertices = pd.read_csv('vertices.csv')

In [26]:
edges.head()

Unnamed: 0,Source,Target,Weight
0,118,201,0.1804
1,118,258,0.1474
2,118,182,0.0332
3,118,152,0.0416
4,118,162,0.0676


In [27]:
vertices.head()

Unnamed: 0,id,Name,X,Y,Z,label
0,1,Wiley Post-Will Rogers Mem,0.4407,0.091,0.5,Alaska
1,2,Deadhorse,0.483,0.1014,0.5,Alaska
2,3,Ralph Wien Memorial,0.411,0.133,0.5,Alaska
3,4,Fairbanks Intl,0.4861,0.1528,0.5,Alaska
4,5,Nome,0.3965,0.1557,0.5,Alaska


In [28]:
# make a new column in the vertices dataframe that is the degree of each vertex
# vertices['degree'] = vertices['id'].map(edges['Source'].value_counts()) + vertices['id'].map(edges['Target'].value_counts())
# make degree 0 for vertices that are not in the edges dataframe
# vertices['degree'] = vertices['degree'].fillna(0)
vertices['inDegree'] = vertices['id'].map(edges['Target'].value_counts())
vertices['outDegree'] = vertices['id'].map(edges['Source'].value_counts())
vertices['inDegree'] = vertices['inDegree'].fillna(0)
vertices['outDegree'] = vertices['outDegree'].fillna(0)

In [29]:
vertices.head()

Unnamed: 0,id,Name,X,Y,Z,label,inDegree,outDegree
0,1,Wiley Post-Will Rogers Mem,0.4407,0.091,0.5,Alaska,0.0,3.0
1,2,Deadhorse,0.483,0.1014,0.5,Alaska,1.0,2.0
2,3,Ralph Wien Memorial,0.411,0.133,0.5,Alaska,0.0,2.0
3,4,Fairbanks Intl,0.4861,0.1528,0.5,Alaska,2.0,3.0
4,5,Nome,0.3965,0.1557,0.5,Alaska,1.0,1.0


In [30]:
# Modify to explicitly exclude the grouping columns
vertices['connectedTo'] = vertices['id'].map(edges.groupby('Source', group_keys=False).apply(lambda x: list(zip(x['Target'], x['Weight'])),include_groups=False))
vertices['connectedTo'] = vertices['connectedTo'].fillna('')
vertices['connectedTo'] = vertices['connectedTo'].apply(lambda x: sorted(x))


In [31]:
vertices

Unnamed: 0,id,Name,X,Y,Z,label,inDegree,outDegree,connectedTo
0,1,Wiley Post-Will Rogers Mem,0.4407,0.0910,0.5,Alaska,0.0,3.0,"[(2, 0.0436), (4, 0.0767), (8, 0.1026)]"
1,2,Deadhorse,0.4830,0.1014,0.5,Alaska,1.0,2.0,"[(4, 0.0515), (8, 0.0866)]"
2,3,Ralph Wien Memorial,0.4110,0.1330,0.5,Alaska,0.0,2.0,"[(5, 0.0269), (8, 0.0843)]"
3,4,Fairbanks Intl,0.4861,0.1528,0.5,Alaska,2.0,3.0,"[(8, 0.0365), (26, 0.0915), (47, 0.2109)]"
4,5,Nome,0.3965,0.1557,0.5,Alaska,1.0,1.0,"[(8, 0.0849)]"
...,...,...,...,...,...,...,...,...,...
327,328,Rota Intl,0.1454,0.6366,0.5,Northern Mariana Islands,1.0,1.0,"[(329, 0.007)]"
328,329,Guam Intll,0.1431,0.6432,0.5,Guam,3.0,1.0,"[(330, 0.0784)]"
329,330,Babelthuap/Koror,0.0909,0.7017,0.5,Palau,2.0,0.0,[]
330,331,Pago Pago Intl,0.3697,0.9090,0.5,American Samoa,2.0,0.0,[]


In [32]:
# convert current to a csv file
vertices.to_csv('adjacencyList.csv', id=False)

In [33]:
vertices['label'].unique()

array(['Alaska', 'Washington', 'Montana', 'North Dakota', 'Minnesota',
       'Michigan', 'Idaho', 'Tennessee', 'Oregon', 'Maine', 'Wisconsin',
       'Vermont', 'South Dakota', 'Wyoming', 'New York', 'New Hampshire',
       'Pennsylvania', 'Iowa', 'Massachusetts', 'Illinois', 'Connecticut',
       'California', 'Rhode Island', 'Indiana', 'Ohio', 'Nebraska',
       'Nevada', 'Utah', 'New Jersey', 'Colorado', 'Missouri', 'Maryland',
       'Kentucky', 'Virginia', 'West Virginia', 'Kansas', 'Oklahoma',
       'North Carolina', 'Arkansas', 'Texas', 'Arizona', 'New Mexico',
       'South Carolina', 'Alabama', 'Georgia', 'Louisiana', 'Mississippi',
       'Florida', 'Hawaii', 'Puerto Rico', 'U.S. Virgin Islands',
       'Unincorporated U.S. Territory', 'Northern Mariana Islands',
       'Guam', 'Palau', 'American Samoa'], dtype=object)

In [34]:
len(vertices['label'].unique())

56

In [36]:
import plotly.express as px
import pandas as pd
import numpy as np

# Dictionary of regions with states and their hypothetical traffic values
regions = {
    "Midwest": ["Minnesota", "Iowa", "Missouri", "North Dakota", "South Dakota", "Nebraska",
                "Wisconsin", "Illinois", "Michigan", "Indiana", "Ohio", "Kansas"],
    "New England": ["Maine", "Vermont", "New Hampshire", "Massachusetts", "Rhode Island", "Connecticut"],
    "Southwest": ["Texas", "Arizona", "New Mexico", "Oklahoma"],
    "Northeast": ["New York", "Pennsylvania", "New Jersey", "Delaware", "Maryland"],
    "Northwest": ["Washington", "Oregon", "Idaho", "Montana", "Wyoming", "Alaska"]
}

# Create a list to hold our data for each state and region
data = []

# Generate example 'traffic' values for each state, normalized within each region
for region, states in regions.items():
    # Generate random traffic values for each state (as placeholder for real data)
    traffic_values = np.random.randint(100, 1000, len(states))
    max_traffic = max(traffic_values)
    normalized_traffic = traffic_values / max_traffic  # Normalize within the region

    # Append data for each state in the region
    for state, traffic in zip(states, normalized_traffic):
        data.append({"Region": region, "State": state, "Traffic": traffic})

# Convert to DataFrame
df = pd.DataFrame(data)

# Create a treemap with a blue color gradient based on normalized traffic values
fig = px.treemap(
    df,
    path=["Region", "State"],
    values="Traffic",
    color="Traffic",
    color_continuous_scale="Blues",
    title="Combined Treemap of North American Regions (Normalized Traffic)",
)

# Display the treemap
fig.show()
