# Basic utilities for viewing data from Pangoly

In [None]:
# imports
import os
import json
import copy
import numpy as np
import datetime as dt
from matplotlib import pyplot as plt

In [None]:
# Constants
path_pangoly_root = "data/scraped/pangoly"
filepath_pangoly_sample = os.path.join(*[path_pangoly_root, "Hardware-and-peripherals", "Processors", "AMD", "amd-ryzen-7-2700x.json"])

In [None]:
def print_file_counts(path: str = path_pangoly_root):
	"""
	Prints the number of files in each directory in the pangoly data directory.
	"""
	# get directories at root level
	dirs = [d for d in os.listdir(path) if os.path.isdir(os.path.join(path, d))]
	for rdir in dirs:
		filecount = 0
		for root, dirs, files in os.walk(os.path.join(path, rdir)):
			filecount += len(files)
		print(f"'{rdir}': {filecount}")

print_file_counts(os.path.join(path_pangoly_root, "Hardware-and-peripherals"))

In [None]:
data = json.load(open(filepath_pangoly_sample, "r"))
region_links = data["pages"]["history"]["region_links"]
regions = data["pages"]["history"]["regions"]

In [None]:
# convert from list of elements that look like this: [1619740800000.0,392.79] to numpy array - convert first element to a datetime
def get_timeseries(timeseries: list) -> list:
	'''
		Extracts a timeseries
	'''
	# sort by timestamp ascending
	timeseries.sort(key=lambda x: x[0])
	# convert timestamp to a datetime
	timeseries = [(dt.datetime.fromtimestamp(element[0]/1000.0), element[1]) for element in timeseries]
	return timeseries


# timeseries is list of tuples (value: float, timestamp: float)
def get_timeseries_info(timeseries: list) -> dict:
	'''
		Extracts information about a timeseries
	'''
	# convert first element to a datetime
	timeseries_copy = copy.deepcopy(timeseries)
	timeseries_copy = get_timeseries(timeseries_copy)
	timestamps = np.array([element[0] for element in timeseries_copy])
	prices = np.array([element[1] for element in timeseries_copy])
	info = {
		"timestamps": timestamps,
		"prices": prices,
		"min": np.min(prices),
		"max": np.max(prices),
		"mean": np.mean(prices),
		"std": np.std(prices),
		"median": np.median(prices),
		"count": len(prices),
		"firstTimestamp": timestamps[0],
		"lastTimestamp": timestamps[-1],
	}
	return info


def print_region_info(region: dict):
	'''
		Prints basic information about the region
	'''
	currency_code = region["metadata"]["info"]["currency"]["code"]
	currency_symbol = region["metadata"]["info"]["currency"]["symbol"]
	print(f"Region '{region['metadata']['info']['name']}' ({region['metadata']['info']['key']})")
	if "price_chart" in region["datapoints"] and region["datapoints"]["price_chart"] is not None:
		sellers = region["datapoints"]["price_chart"]["data"]
		print(f"Price chart has {len(sellers.keys())} sellers")
		for seller, price_timeseries in sellers.items():
			print(f"\tSeller '{seller}' has {len(price_timeseries)} datapoints")
			timeseries_info = get_timeseries_info(price_timeseries)
			del timeseries_info["timestamps"]
			del timeseries_info["prices"]
			# print(f"\t\t{json.dumps(timeseries_info, indent=4, default=str)}")
			# print all keys (indented)
			print(f"Timeseries info (prices in {currency_code}):")
			for key, value in timeseries_info.items():
				print(f"\t\t{key}: {value}")
	if "trend_history" in region["datapoints"] and region["datapoints"]["trend_history"] is not None:
		trend_history = region["datapoints"]["trend_history"]["data"]
		trend_title = region["datapoints"]["trend_history"]["title"].strip()
		print(f"Trend '{trend_title}' has {len(trend_history)} datapoints")
		timeseries_info = get_timeseries_info(trend_history)
		del timeseries_info["timestamps"]
		del timeseries_info["prices"]
		# print(f"\t{json.dumps(timeseries_info, indent=4, default=str)}")
		print(f"Timeseries info (prices in {currency_code}):")
		for key, value in timeseries_info.items():
				print(f"\t\t{key}: {value}")


def print_regions_info(regions: dict):
	'''
		Prints basic information about the regions
	'''
	for region in regions.values():
		print_region_info(region)
		print()


print_regions_info(regions)