# Graph Generator

> This notebook is used to make graphs for TSP.

In [None]:
import pandas as pd

Open pubs dataset.

In [None]:
df_full = pd.read_csv('../1_pubs_crawler/pubs.csv')
df_full.head()

Select a random subset of pubs of a predermined size.

In [None]:
SAMPLE_SIZE = 1000

df_full.sample(SAMPLE_SIZE)

Remove these without replacement.

In [None]:
n_subsets = 10 # number of subsets

assert(n_subsets * SAMPLE_SIZE < len(df_full)) # Make sure there is enough data to take samples without replacement

subsets = []

for i in range(n_subsets):
    df_temp = df_full.sample(SAMPLE_SIZE)
    subsets.append(df_temp)
    df_full.drop(df_temp.index, inplace=True)
    
print("Number of subsets:" + str(len(subsets)))

Reset the index numbers.

In [None]:
for sub in subsets:
    # sub.set_index(pd.Index([i for i in range(1,len(sub)+1)]), inplace=True)
    sub.reset_index(inplace=True)
    
subsets[0].head()

Define a function to create .tsp files to run with the Concorde.

In [None]:
def tsp_file(data, name='unnamed', DIR='.'):
    """
    Arguements:
    name -- Name of the TSP file.
    data -- dataframe containing longitude,latitude and indexed starting from 1
    
    Creates a <name>.tsp file in the .tsp format
    """
    
    file = open(DIR + "/" + name + ".tsp", "w+")
    
    file.write("NAME: " + name + "\n")
    file.write("TYPE: TSP\n")
    file.write("COMMENT: " + str(len(data)) + " pub locations in the UK\n")
    file.write("DIMENSION: " + str(len(data)) + "\n")
    file.write("EDGE_WEIGHT_TYPE: GEO\n") # This will make weights in kilometers by approximating the earth as a sphere
    file.write("NODE_COORD_SECTION\n")
    
    for i in range(len(data)):
        file.write("{} {:.6f} {:.6f}\n".format(data.index[i], data['latitude'].iloc[i], data['longitude'].iloc[i]))
    
    file.close()

Convert the subsets to .tsp files.

In [None]:
for sub in enumerate(subsets):
    tsp_file(sub[1], 'Graph'+str(sub[0]), DIR='./Graphs')