# Location Dataframe

Part 1 - An inital dataframe with city names, longitude, latitudes and distances between each city.

Part 2 - A dummy model that randomly picks cities and displayed on a map. A second optimized model that chooses the best route possible.

# Part 1

## 1.1 Import Packages

In [None]:
# import standard packages
import numpy as np
import pandas as pd
import random

# import visualization packages
import plotly.express as px
import plotly.graph_objects as go

# import distance calculation packages
from geopy.distance import geodesic

# import model packages
from python_tsp.exact import solve_tsp_dynamic_programming

# to read csv files
from google.colab import files

## 1.2 Create Dataframe

In [None]:
# upload csv file to google collab
# uploaded = files.upload()

In [None]:
# download csv of cities
df = pd.read_csv('uscities.csv')

In [None]:
# create column names to keep
columns = ['city', 'state_id', 'lng', 'lat']

# select columns
df = df[columns]

# re-order columns
df = df[['city', 'state_id', 'lng', 'lat']]

In [None]:
# save list for cities
cities_states = {'Denver':'CO', 'Tampa':'FL', 'Atlanta':'GA', 'Seattle':'WA', 'New York':'NY', 'Los Angeles':'CA', 'Chicago':'IL', 'Dallas':'TX'}

# create a list of indexes to keep
keep = []

# loop through cities and states, keeping those that match
for city, state in cities_states.items():
    try:
        keep.append(df[(df['city'] == city) & (df['state_id'] == state)].index[0])
    except:
        print(f'{city} not found in the dataset')

# create a new dataframe with only the cities we want
df = df.loc[keep]

# Reset the index
df.reset_index(drop=True, inplace=True)

In [None]:
# Show df
df

Unnamed: 0,city,state_id,lng,lat
0,Denver,CO,-104.8758,39.762
1,Tampa,FL,-82.4447,27.9945
2,Atlanta,GA,-84.422,33.7628
3,Seattle,WA,-122.3244,47.6211
4,New York,NY,-73.9249,40.6943
5,Los Angeles,CA,-118.4068,34.1141
6,Chicago,IL,-87.6866,41.8375
7,Dallas,TX,-96.7667,32.7935


In [None]:
## 1.3 Find distances between cities

In [None]:
# Create new columns to store the distances
for city in cities_states.keys():
    df[city] = np.nan

# Loop through each city
for city in cities_states.keys():
    # Get the coordinates of the current city
    city_coords = (df[df['city'] == city]['lat'].values[0], df[df['city'] == city]['lng'].values[0])

    # Loop through each row in the dataframe
    for i in range(len(df)):
        # Calculate the distance between the city and the row
        row_coords = (df['lat'][i], df['lng'][i])
        distance = geodesic(row_coords, city_coords).miles

        # Add the distance to the dataframe
        df.at[i, city] = distance

# Show df
df

Unnamed: 0,city,state_id,lng,lat,Denver,Tampa,Atlanta,Seattle,New York,Los Angeles,Chicago,Dallas
0,Denver,CO,-104.8758,39.762,0.0,1516.621897,1204.493538,1025.238506,1629.597074,842.916762,911.068512,659.567308
1,Tampa,FL,-82.4447,27.9945,1516.621897,0.0,414.353429,2527.476913,1000.540253,2162.677572,999.072454,915.903241
2,Atlanta,GA,-84.422,33.7628,1204.493538,414.353429,0.0,2179.963411,749.724729,1943.325938,584.720305,717.268693
3,Seattle,WA,-122.3244,47.6211,1025.238506,2527.476913,2179.963411,0.0,2412.074636,954.033188,1735.611426,1682.13975
4,New York,NY,-73.9249,40.6943,1629.597074,1000.540253,749.724729,2412.074636,0.0,2462.095847,720.094925,1374.440075
5,Los Angeles,CA,-118.4068,34.1141,842.916762,2162.677572,1943.325938,954.033188,2462.095847,0.0,1748.891824,1251.120559
6,Chicago,IL,-87.6866,41.8375,911.068512,999.072454,584.720305,1735.611426,720.094925,1748.891824,0.0,798.440088
7,Dallas,TX,-96.7667,32.7935,659.567308,915.903241,717.268693,1682.13975,1374.440075,1251.120559,798.440088,0.0


In [None]:
# Part 2

## 2.1 Create a route that randomly picks cities and display on a map.

In [None]:
# list of all cities
cities = list(cities_states.keys())

# save route as list
random_route = []

# loop cities and randomly
while cities:
    current_city = random.choice(cities)
    random_route.append(current_city)
    cities.remove(current_city)

# show route
random_route


['Seattle',
 'Denver',
 'Atlanta',
 'Los Angeles',
 'Tampa',
 'Dallas',
 'Chicago',
 'New York']

In [None]:
# 2.2 Find total distance of random route

# calculate distance_random of route
distance_random = 0

# loop through each city in the route
for i in range(len(random_route) - 1):
    # calculate the distance_random between the two cities
    distance_random += df[random_route[i]][df['city'] == random_route[i + 1]].values[0]
    distance_random = round(distance_random, 2)

distance_random

8770.17

In [None]:
# Set colors
ocean = '#CBF3F0'
lake = '#CBF3F0'
river = '#CBF3F0'
land = '#FFBF69'
text_cities = 'Black'
text_distance = '#FFFFFF'
lines = 'black'
marker = '#FF9F1C'
mode = 'plotly_dark' # 'plotly', 'plotly_white', 'plotly_dark', 'ggplot2', 'seaborn', 'simple_white', 'none'

# Set borders
width = 1000
height = 800

# Text positions for labels
text_positions = {
    'Chicago': 'bottom right',
    'Los Angeles': 'bottom left',
    'Dallas': 'top right',
    'Atlanta': 'bottom center',
    'Denver': 'bottom left',
    'Tampa': 'bottom right',
    'Seattle': 'top left',
    'New York': 'top left'
}

In [None]:
# Create the map for the random route
fig = go.Figure()

# Add the cities to the map
for city in random_route:
    city_data = df[df['city'] == city]
    if not city_data.empty:
        text_position = text_positions.get(city, 'top right')
        fig.add_trace(go.Scattergeo(
            lon=city_data['lng'].values,
            lat=city_data['lat'].values,
            mode='markers+text',
            marker=dict(size=10, color=marker),
            text=city,
            textposition=text_position,
            textfont=dict(color=text_cities),
            name=city,
            showlegend=False  # Hide city names from legend
        ))

# Change line to a different color for each leg of the route
colors = px.colors.qualitative.Plotly

# Add the lines between the cities
for i in range(len(random_route) - 1):
    city_data_1 = df[df['city'] == random_route[i]]
    city_data_2 = df[df['city'] == random_route[i + 1]]
    if not city_data_1.empty and not city_data_2.empty:
        fig.add_trace(go.Scattergeo(
            lon=[city_data_1['lng'].values[0], city_data_2['lng'].values[0]],
            lat=[city_data_1['lat'].values[0], city_data_2['lat'].values[0]],
            mode='lines',
            line=dict(width=2, color=colors[i % len(colors)]),
            name=f'Leg {i + 1}',
            showlegend=True  # Show only leg names in legend
        ))

# Update the layout
fig.update_layout(
    title={
        'text': 'Random Route',
        'y': 0.85,  # Move the title to just above the map
        'x': 0.5,  # Center the title
        'xanchor': 'center',
        'yanchor': 'top'
    },
    showlegend=True,
    legend=dict(
        y=0.5,  # Position the legend midway down the plot
        yanchor="middle"
    ),
    geo=dict(
        scope='north america',  # restrict the map to the USA
        showland=True,
        showcountries=True,
        showocean=True,
        oceancolor=ocean,
        landcolor=land,
        countrywidth=0.5,
        subunitwidth=0.5,
        showlakes=True,
        lakecolor=lake,
        showsubunits=True,
        showrivers=True,
        rivercolor=river,
    ),
    width=width,  # make the map larger
    height=height,  # make the map larger
    margin=dict(l=10, r=10, t=40, b=10)  # increase top margin for title
)

# Restrict map to the US
fig.update_geos(lataxis_range=[25, 50], lonaxis_range=[-125, -65])

# Add label for distance of route
fig.add_annotation(
    x=0.5,
    y=0.1,
    showarrow=False,
    text=f'Total Distance: {distance_random:.2f} miles',
    font=dict(size=20, color=text_distance),
    xref='paper',
    yref='paper'
)

# Set background color
fig.update_layout(
    plot_bgcolor='rgba(0, 0, 0, 0)',
    paper_bgcolor='rgba(0, 0, 0, 0)',
    font=dict(color='white')
)


# Limit ability to scroll across the map
fig.update_layout(dragmode=False)

# Show the map
fig.show()

In [None]:
# Set colors
ocean = '#CBF3F0'
lake = '#CBF3F0'
river = '#CBF3F0'
land = '#FFBF69'
text_cities = 'Black'
text_distance = '#FFFFFF'
lines = 'black'
marker = '#FF9F1C'
mode = 'plotly_dark' # 'plotly', 'plotly_white', 'plotly_dark', 'ggplot2', 'seaborn', 'simple_white', 'none'

# Set borders
width = 1000
height = 800

# Text positions for labels
text_positions = {
    'Chicago': 'bottom right',
    'Los Angeles': 'bottom left',
    'Dallas': 'top right',
    'Atlanta': 'bottom center',
    'Denver': 'bottom left',
    'Tampa': 'bottom left',
    'Seattle': 'bottom left',
    'New York': 'top left'
}

In [None]:
# Initiate a variable to change the start location
start_loc = 7 # Change this value to desired start point

# Extract the distance matrix generated from the dataframe
distance_matrix = df.drop(columns, axis=1).values
distance_matrix

array([[   0.        , 1516.62189655, 1204.49353827, 1025.23850564,
        1629.59707375,  842.91676196,  911.06851188,  659.56730841],
       [1516.62189655,    0.        ,  414.3534289 , 2527.4769134 ,
        1000.54025327, 2162.6775718 ,  999.07245395,  915.9032406 ],
       [1204.49353827,  414.3534289 ,    0.        , 2179.96341111,
         749.72472851, 1943.32593843,  584.72030505,  717.26869326],
       [1025.23850564, 2527.4769134 , 2179.96341111,    0.        ,
        2412.07463625,  954.03318845, 1735.61142598, 1682.1397503 ],
       [1629.59707375, 1000.54025327,  749.72472851, 2412.07463625,
           0.        , 2462.09584701,  720.09492511, 1374.4400749 ],
       [ 842.91676196, 2162.6775718 , 1943.32593843,  954.03318845,
        2462.09584701,    0.        , 1748.89182428, 1251.12055904],
       [ 911.06851188,  999.07245395,  584.72030505, 1735.61142598,
         720.09492511, 1748.89182428,    0.        ,  798.440088  ],
       [ 659.56730841,  915.9032406 ,  71

In [None]:
# Adjust the distance matrix to start with the chosen starting point
distance_matrix = np.roll(distance_matrix, -start_loc, axis=0)
distance_matrix = np.roll(distance_matrix, -start_loc, axis=1)
distance_matrix

array([[   0.        ,  659.56730841,  915.9032406 ,  717.26869326,
        1682.1397503 , 1374.4400749 , 1251.12055904,  798.440088  ],
       [ 659.56730841,    0.        , 1516.62189655, 1204.49353827,
        1025.23850564, 1629.59707375,  842.91676196,  911.06851188],
       [ 915.9032406 , 1516.62189655,    0.        ,  414.3534289 ,
        2527.4769134 , 1000.54025327, 2162.6775718 ,  999.07245395],
       [ 717.26869326, 1204.49353827,  414.3534289 ,    0.        ,
        2179.96341111,  749.72472851, 1943.32593843,  584.72030505],
       [1682.1397503 , 1025.23850564, 2527.4769134 , 2179.96341111,
           0.        , 2412.07463625,  954.03318845, 1735.61142598],
       [1374.4400749 , 1629.59707375, 1000.54025327,  749.72472851,
        2412.07463625,    0.        , 2462.09584701,  720.09492511],
       [1251.12055904,  842.91676196, 2162.6775718 , 1943.32593843,
         954.03318845, 2462.09584701,    0.        , 1748.89182428],
       [ 798.440088  ,  911.06851188,  99

In [None]:
# Initiate a new df that matches the custom start point as index 0
rolled_df = df.reindex(np.roll(df.index, -start_loc, axis=0)).reset_index(drop=True)
rolled_df['city']

0         Dallas
1         Denver
2          Tampa
3        Atlanta
4        Seattle
5       New York
6    Los Angeles
7        Chicago
Name: city, dtype: object

In [None]:
# Run the model to get the optimal route from the start point
permutation, distance_optimal = solve_tsp_dynamic_programming(distance_matrix)

# Initiate list of cities based on optimal route at the custom start point
optimal_route = rolled_df.loc[permutation, 'city'].values
optimal_route

array(['Dallas', 'Los Angeles', 'Seattle', 'Denver', 'Chicago',
       'New York', 'Atlanta', 'Tampa'], dtype=object)

In [None]:
# Create the map for the optimal route
fig = go.Figure()

# Add the cities to the map
for city in optimal_route:
    city_data = df[df['city'] == city]
    if not city_data.empty:
        text_position = text_positions.get(city, 'top right')
        fig.add_trace(go.Scattergeo(
            lon=city_data['lng'].values,
            lat=city_data['lat'].values,
            mode='markers+text',
            marker=dict(size=10, color=marker),
            text=city,
            textposition=text_position,
            textfont=dict(color=text_cities),
            name=city,
            showlegend=False  # Hide city names from legend
        ))

# Change line to a different color for each leg of the route
colors = px.colors.qualitative.Plotly

# Add the lines between the cities
for i in range(len(optimal_route) - 1):
    city_data_1 = df[df['city'] == optimal_route[i]]
    city_data_2 = df[df['city'] == optimal_route[i + 1]]
    if not city_data_1.empty and not city_data_2.empty:
        fig.add_trace(go.Scattergeo(
            lon=[city_data_1['lng'].values[0], city_data_2['lng'].values[0]],
            lat=[city_data_1['lat'].values[0], city_data_2['lat'].values[0]],
            mode='lines',
            line=dict(width=2, color=colors[i % len(colors)]),
            name=f'Leg {i + 1}',
            showlegend=True  # Show only leg names in legend
        ))

# Update the layout
fig.update_layout(
    title={
        'text': 'Optimal Route',
        'y': 0.85,  # Move the title to just above the map
        'x': 0.5,  # Center the title
        'xanchor': 'center',
        'yanchor': 'top'
    },
    showlegend=True,
    legend=dict(
        y=0.5,  # Position the legend midway down the plot
        yanchor="middle"
    ),
    geo=dict(
        scope='north america',  # restrict the map to the USA
        showland=True,
        showcountries=True,
        showocean=True,
        oceancolor=ocean,
        landcolor=land,
        countrywidth=0.5,
        subunitwidth=0.5,
        showlakes=True,
        lakecolor=lake,
        showsubunits=True,
        showrivers=True,
        rivercolor=river,
    ),
    width=width,  # make the map larger
    height=height,  # make the map larger
    margin=dict(l=10, r=10, t=40, b=10)  # increase top margin for title
)

# Restrict map to the US
fig.update_geos(lataxis_range=[25, 50], lonaxis_range=[-125, -65])

# Add label for distance of route
fig.add_annotation(
    x=0.5,
    y=0.1,
    showarrow=False,
    text=f'Total Distance: {distance_optimal:.2f} miles',
    font=dict(size=20, color=text_distance),
    xref='paper',
    yref='paper'
)

# Set background color
fig.update_layout(
    plot_bgcolor='rgba(0, 0, 0, 0)',
    paper_bgcolor='rgba(0, 0, 0, 0)',
    font=dict(color='white')
)


# Limit ability to scroll across the map
fig.update_layout(dragmode=False)

# Show the map
fig.show()

Package                          Version
-------------------------------- ---------------------
absl-py                          1.4.0
aiohttp                          3.9.5
aiosignal                        1.3.1
alabaster                        0.7.16
albumentations                   1.3.1
altair                           4.2.2
annotated-types                  0.7.0
anyio                            3.7.1
argon2-cffi                      23.1.0
argon2-cffi-bindings             21.2.0
array_record                     0.5.1
arviz                            0.15.1
astropy                          5.3.4
astunparse                       1.6.3
async-timeout                    4.0.3
atpublic                         4.1.0
attrs                            23.2.0
audioread                        3.0.1
autograd                         1.6.2
Babel                            2.15.0
backcall                         0.2.0
beautifulsoup4                   4.12.3
bidict                           0.23.1