In [2]:
# The objective of this notebook is to filter the data set to only the Consumables we are interested in
# right now the data set is too large and we are only interested in a few consumables
# we will filter the data set to only the consumables we are interested in and then save the data set to a new file

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
import os
import sys

In [3]:
# Load the data set
marketable_conusmables = pd.read_csv('..\\data\\marketable_consumables.csv')
marketable_conusmables.head()

Unnamed: 0,ID,Icon,Name,Url,UrlType,_,_Score
0,4551,/i/020000/020601.png,Potion,/Item/4551,Item,item,0
1,4552,/i/020000/020602.png,Hi-Potion,/Item/4552,Item,item,0
2,4553,/i/020000/020603.png,Mega-Potion,/Item/4553,Item,item,0
3,4554,/i/020000/020603.png,X-Potion,/Item/4554,Item,item,0
4,4555,/i/020000/020621.png,Ether,/Item/4555,Item,item,0


In [4]:
# Lets reduce this list to the 5 items we are interested in
# "Grade 8 Tincture" will be the filter we use to reduce the data set
# we will only keep the rows that have "Grade 8 Tincture" in the name
tinctures = marketable_conusmables[marketable_conusmables['Name'].str.contains('Grade 8 Tincture')]
tinctures

Unnamed: 0,ID,Icon,Name,Url,UrlType,_,_Score
2206,39727,/i/020000/020710.png,Grade 8 Tincture of Strength,/Item/39727,Item,item,0
2207,39728,/i/020000/020709.png,Grade 8 Tincture of Dexterity,/Item/39728,Item,item,0
2208,39729,/i/020000/020707.png,Grade 8 Tincture of Vitality,/Item/39729,Item,item,0
2209,39730,/i/020000/020706.png,Grade 8 Tincture of Intelligence,/Item/39730,Item,item,0
2210,39731,/i/020000/020708.png,Grade 8 Tincture of Mind,/Item/39731,Item,item,0


In [5]:
# Save the data set to a new file
tinctures.to_csv('..\\data\\tinctures.csv', index=False)

In [6]:
# Now we have a new data set that only contains the consumables we are interested in
# we can import all the market data from the game and then filter it to only the consumables we are interested in
# this will make the data set smaller and easier to work with

all_market_data = pd.read_csv('..\\data\\all_results_1_year.csv')
all_market_data.head()

Unnamed: 0,itemID,lastUploadTime,entries,dcName,regularSaleVelocity,nqSaleVelocity,hqSaleVelocity,stackSizeHistogram.1,stackSizeHistogram.2,stackSizeHistogram.3,...,stackSizeHistogramHQ.99,stackSizeHistogram.97,stackSizeHistogramNQ.76,stackSizeHistogramNQ.79,stackSizeHistogramNQ.85,stackSizeHistogramNQ.88,stackSizeHistogramNQ.97,stackSizeHistogramHQ.89,stackSizeHistogramHQ.97,stackSizeHistogramHQ.92
0,4551,1710803938756,"[{'hq': False, 'pricePerUnit': 2, 'quantity': ...",Aether,420.42856,185.28572,235.14285,443.0,404.0,629.0,...,316.0,,,,,,,,,
1,4552,1710807769958,"[{'hq': False, 'pricePerUnit': 25, 'quantity':...",Aether,510.57144,268.14285,242.42857,597.0,393.0,1284.0,...,460.0,7.0,2.0,4.0,1.0,3.0,3.0,9.0,4.0,
2,4553,1710864064227,"[{'hq': False, 'pricePerUnit': 999, 'quantity'...",Aether,346.14285,64.0,282.14285,583.0,580.0,952.0,...,343.0,4.0,,1.0,1.0,3.0,,,4.0,3.0
3,4554,1710864071158,"[{'hq': True, 'pricePerUnit': 299, 'quantity':...",Aether,226.0,35.0,191.0,634.0,697.0,977.0,...,215.0,4.0,,1.0,3.0,1.0,3.0,2.0,1.0,1.0
4,4555,1710806562911,"[{'hq': False, 'pricePerUnit': 10, 'quantity':...",Aether,153.57143,150.14285,3.428571,365.0,186.0,524.0,...,32.0,,3.0,,1.0,1.0,,,,


In [11]:
# normalize the entries column at 0 index for all_market_data
normal = all_market_data.head(1)
normal

Unnamed: 0,itemID,lastUploadTime,entries,dcName,regularSaleVelocity,nqSaleVelocity,hqSaleVelocity,stackSizeHistogram.1,stackSizeHistogram.2,stackSizeHistogram.3,...,stackSizeHistogramHQ.99,stackSizeHistogram.97,stackSizeHistogramNQ.76,stackSizeHistogramNQ.79,stackSizeHistogramNQ.85,stackSizeHistogramNQ.88,stackSizeHistogramNQ.97,stackSizeHistogramHQ.89,stackSizeHistogramHQ.97,stackSizeHistogramHQ.92
0,4551,1710803938756,"[{'hq': False, 'pricePerUnit': 2, 'quantity': ...",Aether,420.42856,185.28572,235.14285,443.0,404.0,629.0,...,316.0,,,,,,,,,


In [17]:
results = pd.DataFrame.from_records(json.loads(normal['entries'][0]))
results

JSONDecodeError: Expecting property name enclosed in double quotes: line 1 column 3 (char 2)

In [6]:
# Now we will filter the data set to only the consumables we are interested in
# we will use the "tinctures" data set to filter the "all_market_data" data set
# all_market_data does not contain the "Name" column so we will need to merge the two data sets on the "ItemID" column and "ID" column

# Merge the two data sets
tincture_market_data = pd.merge(tinctures, all_market_data, left_on='ID', right_on='itemID', how='inner')
tincture_market_data

Unnamed: 0,ID,Icon,Name,Url,UrlType,_,_Score,itemID,lastUploadTime,entries,...,stackSizeHistogramHQ.99,stackSizeHistogram.97,stackSizeHistogramNQ.76,stackSizeHistogramNQ.79,stackSizeHistogramNQ.85,stackSizeHistogramNQ.88,stackSizeHistogramNQ.97,stackSizeHistogramHQ.89,stackSizeHistogramHQ.97,stackSizeHistogramHQ.92
0,39727,/i/020000/020710.png,Grade 8 Tincture of Strength,/Item/39727,Item,item,0,39727,1710880895885,"[{'hq': True, 'pricePerUnit': 2790, 'quantity'...",...,89546.0,23.0,1.0,1.0,1.0,,1.0,19.0,22.0,17.0
1,39728,/i/020000/020709.png,Grade 8 Tincture of Dexterity,/Item/39728,Item,item,0,39728,1710881460403,"[{'hq': True, 'pricePerUnit': 2853, 'quantity'...",...,37165.0,23.0,,,,,1.0,20.0,22.0,6.0
2,39729,/i/020000/020707.png,Grade 8 Tincture of Vitality,/Item/39729,Item,item,0,39729,1710807960305,"[{'hq': True, 'pricePerUnit': 2974, 'quantity'...",...,64.0,,,,,,,1.0,,
3,39730,/i/020000/020706.png,Grade 8 Tincture of Intelligence,/Item/39730,Item,item,0,39730,1710880803136,"[{'hq': True, 'pricePerUnit': 2867, 'quantity'...",...,32106.0,12.0,,1.0,1.0,,1.0,11.0,11.0,10.0
4,39731,/i/020000/020708.png,Grade 8 Tincture of Mind,/Item/39731,Item,item,0,39731,1710880809452,"[{'hq': True, 'pricePerUnit': 2995, 'quantity'...",...,34881.0,9.0,,,,,,8.0,9.0,7.0


In [7]:
# Save the data set to a new file
tincture_market_data.to_csv('..\\data\\tincture_market_data.csv', index=False)

In [8]:
# For every single index in the "tincture_market_data" df, we will create a new df that contains the market data for that item

strength_tincture = tincture_market_data[tincture_market_data['Name'].str.contains('Strength')]
dexterity_tincture = tincture_market_data[tincture_market_data['Name'].str.contains('Dexterity')]
intelligence_tincture = tincture_market_data[tincture_market_data['Name'].str.contains('Intelligence')]
mind_tincture = tincture_market_data[tincture_market_data['Name'].str.contains('Mind')]
vitality_tincture = tincture_market_data[tincture_market_data['Name'].str.contains('Vitality')]

In [14]:
strength_tincture

Unnamed: 0,ID,Icon,Name,Url,UrlType,_,_Score,itemID,lastUploadTime,entries,...,stackSizeHistogramHQ.99,stackSizeHistogram.97,stackSizeHistogramNQ.76,stackSizeHistogramNQ.79,stackSizeHistogramNQ.85,stackSizeHistogramNQ.88,stackSizeHistogramNQ.97,stackSizeHistogramHQ.89,stackSizeHistogramHQ.97,stackSizeHistogramHQ.92
0,39727,/i/020000/020710.png,Grade 8 Tincture of Strength,/Item/39727,Item,item,0,39727,1710880895885,"[{'hq': True, 'pricePerUnit': 2790, 'quantity'...",...,89546.0,23.0,1.0,1.0,1.0,,1.0,19.0,22.0,17.0


In [22]:
strength_tincture.columns

Index(['ID', 'Icon', 'Name', 'Url', 'UrlType', '_', '_Score', 'itemID',
       'lastUploadTime', 'entries',
       ...
       'stackSizeHistogramHQ.99', 'stackSizeHistogram.97',
       'stackSizeHistogramNQ.76', 'stackSizeHistogramNQ.79',
       'stackSizeHistogramNQ.85', 'stackSizeHistogramNQ.88',
       'stackSizeHistogramNQ.97', 'stackSizeHistogramHQ.89',
       'stackSizeHistogramHQ.97', 'stackSizeHistogramHQ.92'],
      dtype='object', length=311)