In [17]:
import requests
from bs4 import BeautifulSoup
import json
import math
import re
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sbn
import sqlalchemy
import copy
sbn.set()

## Project details

For the project I'm intending to pull trade date for Elite: Dangerous from EDDB (Elite Dangerous DataBase). EDDB is a third party tool, relying on user submitted data, as well as scraping the official netlogs, and the API for a defunct iOS app. It also shares data with multiple other tools and databases intended for Elite: Dangerous players. Based on my current searches, it seems like the developers of Elite: Dangerous (sometimes also called Adjective: Adjective), Frontier Developments plc do not have a convenient public API.

I'm largely analysing this data to satisfy my own idle curiosity--While I do play the game, I mostly play as a pirate, rather than a trader. It could still be interesting and useful in determining what salvage is worthwhile, or in picking trade lanes to target, but mostly I'm just curious.

###### Project Questions:
1. What is the ranking in terms of average price across commodity categories?
2. What ten non-rare commodities have the highest profit margin?
3. What ten rare commodities have the lowest profit margin?
4. Do rare commodities always have a higher or lower profit margin than non-rare ones?

### Datasets:

#### Commodity Reference dataset from EDDB

This dataset represents aggregated commodity data across individual commodities and in-game locations. It does not include information about commodity prices at different locations within the game, but rather represents average prices across the game's galaxy.

In [18]:
comreq = requests.get("https://eddb.io/archive/v4/commodities.json")

In [19]:
commodities = json.loads(comreq.text)
com_data = pd.DataFrame(commodities)

In [20]:
def extract_category_name(category_dict):
    category_name = category_dict['name']
    return category_name

def extract_category_wrapper(a_row):
    return extract_category_name(a_row.category)

In [21]:
temp_com_data = com_data
temp_com_data['category'] = temp_com_data.apply(extract_category_wrapper, axis=1)
com_data = temp_com_data

In [22]:
com_data.head()

Unnamed: 0,average_price,category,category_id,id,is_rare,name
0,261,Chemicals,1,1,0,Explosives
1,110,Chemicals,1,2,0,Hydrogen Fuel
2,181,Chemicals,1,3,0,Mineral Oil
3,241,Chemicals,1,4,0,Pesticides
4,285,Consumer Items,2,5,0,Clothing


#### Price Listings dataset from EDDB

In [23]:
pricereq = requests.get("https://eddb.io/archive/v4/listings.csv")
price_row_list = pricereq.text.split('\n')

In [24]:
price_headings = price_row_list[0]
price_row_list = price_row_list[1:]

In [25]:
price_headings = price_headings.split(",")

In [26]:
for index in range(len(price_row_list)):
    entry_str = price_row_list[index]
    new_entry = entry_str.split(",")
    price_row_list[index] = new_entry

In [27]:
x = len(price_headings)
problem_indices = []
for entry in price_row_list:
    if len(entry) != x:
        index = price_row_list.index(entry)
        problem_indices.append(index)
        
for index in problem_indices:
    del price_row_list[index]

In [28]:
prices = {}
for index in range(len(price_headings)):
    key = price_headings[index]
    value = []
    for entry in price_row_list:
        value.append(int(entry[index]))
    prices[key] = value
    
price_data = pd.DataFrame(prices)

In [29]:
price_data.head()

Unnamed: 0,buy_price,collected_at,commodity_id,demand,id,sell_price,station_id,supply
0,0,1475765153,5,103,1,315,1,0
1,0,1475765153,6,8,2,6795,1,0
2,0,1475765153,7,63,3,527,1,0
3,0,1475765153,14,14,4,1286,1,0
4,0,1475765153,15,33,5,1286,1,0


### Data Analysis:

#### What is the average commodity price within every category? e.g. what are the average price of Chemicals or Consumer Items? Additionally, what categories have the highest average price?

In [39]:
com_data[['category', 'average_price']].groupby('category').mean().sort_values('average_price', ascending=False)

Unnamed: 0_level_0,average_price
category,Unnamed: 1_level_1
Salvage,34375.025
Slavery,15719.333333
Industrial Materials,11845.9
Consumer Items,9443.642857
Minerals,9363.045455
Metals,9222.086957
Weapons,8286.7
Legal Drugs,8146.333333
Medicines,7661.588235
Foods,7412.122449
