In [6]:
import networkx
import json
import requests
import pandas as pd
import numpy as np

In [11]:
pd.options.display.max_columns = 999

### Adjective: Adjective Trade Route Analysis

For the project, I'm going to be looking at optimizing trade routes in Elite: Dangerous, using the datasets from EDDB for galactic commodity prices, general commodity data, stations, and populated systems. This data will be used to calculate optimal trade routes from a given system, which will be displayed in an undirected graph. This graph will highlight trade routes when systems are hovered over (Or at least that's my goal).

In [13]:
system_req = requests.get('https://eddb.io/archive/v5/systems_populated.json')
system_resp = json.loads(sys_req.text)
system_data = pd.DataFrame(sys_resp)

In [19]:
station_req = requests.get('https://eddb.io/archive/v5/stations.json')
station_resp = json.loads(station_req.text)
station_data = pd.DataFrame(station_resp)

In [28]:
commodity_req = requests.get('https://eddb.io/archive/v5/commodities.json')
commodity_resp = json.loads(commodity_req.text)
commodity_data = pd.DataFrame(commodity_resp)

In [31]:
def extract_category_name(category_dict):
    category_name = category_dict['name']
    return category_name

def extract_category_wrapper(a_row):
    return extract_category_name(a_row.category)

temp_com_data = commodity_data
temp_com_data['category'] = temp_com_data.apply(extract_category_wrapper, axis=1)
commodity_data = temp_com_data
commodity_data.rename(columns={'id': 'commodity_id'}, inplace=True)

In [24]:
price_req = requests.get('https://eddb.io/archive/v5/listings.csv')
price_row_list = price_req.text.split('\n')
price_headings = price_row_list[0]
price_row_list = price_row_list[1:]
price_headings = price_headings.split(",")
for index in range(len(price_row_list)):
    entry_str = price_row_list[index]
    new_entry = entry_str.split(",")
    price_row_list[index] = new_entry
x = len(price_headings)
problem_indices = []
for entry in price_row_list:
    if len(entry) != x:
        index = price_row_list.index(entry)
        problem_indices.append(index)
        
for index in problem_indices:
    del price_row_list[index]
    prices = {}
for index in range(len(price_headings)):
    key = price_headings[index]
    value = []
    for entry in price_row_list:
        value.append(int(entry[index]))
    prices[key] = value
    
price_data = pd.DataFrame(prices)

This next segment is just some quick testing of the data itself, seeing what's included, how I can cross-reference between datasets, etc.

In [21]:
station_data.head(1)

Unnamed: 0,allegiance,allegiance_id,body_id,controlling_minor_faction_id,distance_to_star,economies,export_commodities,government,government_id,has_blackmarket,has_commodities,has_docking,has_market,has_outfitting,has_rearm,has_refuel,has_repair,has_shipyard,id,import_commodities,is_planetary,market_updated_at,max_landing_pad_size,name,outfitting_updated_at,prohibited_commodities,selling_modules,selling_ships,settlement_security,settlement_security_id,settlement_size,settlement_size_id,shipyard_updated_at,state,state_id,system_id,type,type_id,updated_at
0,Federation,3.0,,13925.0,171.0,[Agriculture],"[Mineral Oil, Fruit and Vegetables, Grain]",Corporate,64.0,False,True,True,True,True,True,True,True,True,5,"[Pesticides, Aquaponic Systems, Biowaste]",False,1480872000.0,L,Reilly Hub,1480872000.0,"[Narcotics, Tobacco, Combat Stabilisers, Imper...","[738, 739, 740, 743, 744, 745, 748, 749, 750, ...","[Adder, Eagle Mk. II, Hauler, Sidewinder Mk. I...",,,,,1480872000.0,,80.0,396,Orbis Starport,8.0,1479385274


In [22]:
system_data.query("id == 396")

Unnamed: 0,allegiance,allegiance_id,controlling_minor_faction,controlling_minor_faction_id,edsm_id,government,government_id,id,is_populated,minor_faction_presences,name,needs_permit,population,power,power_state,power_state_id,primary_economy,primary_economy_id,reserve_type,reserve_type_id,security,security_id,simbad_ref,state,state_id,updated_at,x,y,z
345,Federation,3.0,Abukunin Silver Fortune Ind,13925.0,15400.0,Corporate,64.0,396,True,"[{'state': None, 'minor_faction_id': 10533, 'i...",Abukunin,False,944577389.0,Zachary Hudson,Exploited,,Agriculture,1.0,,,Medium,32.0,,,80.0,1479385273,-69.3125,-7.4375,61.9375


In [29]:
price_data.head(1)

Unnamed: 0,buy_price,collected_at,commodity_id,demand,id,sell_price,station_id,supply
0,0,1480947768,5,3369,1,526,1,0


In [32]:
commodity_data.head(1)

Unnamed: 0,average_price,category,category_id,commodity_id,is_rare,name
0,261,Chemicals,1,1,0,Explosives


In [26]:
print(len(station_data))
print(len(system_data))
print(len(price_data))

63440
19989
2692497


In [27]:
station_data.query("id == 1")

Unnamed: 0,allegiance,allegiance_id,body_id,controlling_minor_faction_id,distance_to_star,economies,export_commodities,government,government_id,has_blackmarket,has_commodities,has_docking,has_market,has_outfitting,has_rearm,has_refuel,has_repair,has_shipyard,id,import_commodities,is_planetary,market_updated_at,max_landing_pad_size,name,outfitting_updated_at,prohibited_commodities,selling_modules,selling_ships,settlement_security,settlement_security_id,settlement_size,settlement_size_id,shipyard_updated_at,state,state_id,system_id,type,type_id,updated_at
50000,Federation,3.0,,13066.0,16000.0,[Tourism],"[Hydrogen Fuel, Biowaste]",Corporate,64.0,False,True,True,True,True,True,True,True,True,1,"[Beer, Silver]",False,1480948000.0,L,Bain Colony,1480948000.0,"[Narcotics, Tobacco]","[738, 739, 740, 741, 742, 1085, 1191, 1207, 12...","[Sidewinder Mk. I, Asp Scout]",,,,,1480948000.0,Boom,16.0,18370,Ocellus Starport,7.0,1481139396


#### Analysis

Now that we have some functional datasets to play with, it's time to actually begin work on analysing it. This is where the filtering of systems comes in--without that, doing any analysis of the network of systems would be difficult. Given that I'm insufficiently familiar with the machine learning techniques that could maybe make this easier, I'm going to have to brute force it, and that means I very definitely need to pare down my datasets.

To filter the data, I'm going to start with the systems, and filter on the basis of their distance from a designated origin system. I could select this arbitrarily, but for the sake of convenience, I'm going to select the Eravate System, which is where myself and all my friends who played started the game. With this, I'll then select all systems within an 40 unit radius of the system, in order to filter the total number of systems I'll be working with to around 500.

Note: Eravate's coordinates in the cartesian system used by the game are (X = -42.4375, Y = -3.15625, Z = 59.65625)

In [43]:
system_data_filtered = system_data.query("x <= -2.4375 & x >= -82.4375 & y <= 37.15625 & y >= -43.15625 & "
                                         "z <= 99.65625 & z >= 19.65625")

In [44]:
len(system_data_filtered)

510

In [46]:
filt_system_id_list = list(system_data_filtered['id'])