In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# downloads the data from google drive
!wget -O data.csv "https://drive.google.com/uc?export=download&id=17zbqonizGGegzRu3A6aaYDo00evwDLsA"
!wget -O predicted_true.json "https://drive.google.com/uc?export=download&id=1AjeXcwYo_wLQaoRbZwcKfayg_irEY9vP"
!wget -O challenge.csv "https://drive.google.com/uc?export=download&id=1bF8P-vdhBFc4fnNDXs4mYxTymkworkRh"

In [None]:
# line graph

# data
x = [1, 2, 3, 4, 5]
y = [2, 4, -1, -2, 3]

# labels
plt.title("Line Graph")
plt.xlabel("x")
plt.ylabel("y")

# graph
plt.plot(x, y, "-o")

# customizations
# differnt markers
# plt.plot(x, y, "-x")
# plt.plot(x, y, "-*")
# plt.plot(x, y, "-v")
# different line styles
# plt.plot(x, y, linestyle="--")
# plt.plot(x, y, linestyle="-.")
# plt.plot(x, y, linestyle=":")

plt.show()

In [None]:
# bar graph

# data
x = ["lettuce", "tomato", "eggplant", "carrot", "onion"]
y = [10, 21, 69, 100, 57]

# labels
plt.title("Bar Graph")
plt.xlabel("Vegetables")
plt.ylabel("Some Number")

# graph
plt.bar(x, y)
# different colors
# plt.bar(x, y, color="pink")
# plt.bar(x, y, color="green")
# plt.bar(x, y, color="orange")

# customizations
# plt.ylim(0, 500)
# plt.yticks([0, 25, 50, 75, 100, 125, 150])
# plt.yticks([0, 50, 100, 150])
# plt.grid(axis="y")

plt.show()

In [None]:
# pie chart

# data
pyramid = ["sky", "sunny_side", "shady_side"]
percentages = [75, 17.5, 7.5]
colors = ["skyblue", "wheat", "darkgoldenrod"]

# labels
plt.title("Pie Chart")


# graph
plt.pie(percentages, labels=pyramid, colors=colors, autopct='%1.1f%%', startangle=315)

# customizations
# plt.legend(loc="upper right")
# explode = [0, 0, 0.2]
# plt.pie(percentages, labels=pyramid, colors=colors, autopct='%1.1f%%', startangle=315, explode=explode)

plt.show()

In [None]:
# training data
data = pd.read_csv("data.csv")

In [None]:
# getting data
# data["bid_price"]
# data.query("stock_id == 0")
# data.query("stock_id < 3")
# data.query("stock_id == 0")["imbalance_size"]
# pd.DataFrame(data.query("stock_id == 0")["imbalance_size"])
# data[["imbalance_size", "bid_price", "bid_size"]]

In [None]:
# seconds vs. imbalance

# data of stock_id == 0
seconds_0 = sorted(data.query("stock_id == 0")["seconds_in_bucket"])
imbalance_0 = data.query("stock_id == 0")["imbalance_size"]

# plt.figure(figsize=(12, 5)) # must do at at beginning

# labels
plt.title("Seconds vs. Imbalance of Stock 0")

# graph
plt.plot(seconds_0, imbalance_0)

# customizations

plt.show()

In [None]:
# data
x = np.arange(0, 191)
y = data.query("date_id == 0 & seconds_in_bucket == 0")["bid_size"]

plt.figure(figsize=(12, 5))

# labels
plt.title("Bid Size of Each Stock at Time 0")
plt.xlabel("Stock")
plt.ylabel("Bid Size")

# graph
plt.bar(x, y)

plt.show()

In [None]:
x = data.query("stock_id >= 0 & stock_id < 6")["stock_id"][:10]
y = data.query("stock_id >= 0 & stock_id < 6")["seconds_in_bucket"][::5][:10]
z = data.query("stock_id >= 0 & stock_id < 6")["bid_size"][:100]
# print(x)
# print(y)
# print(z)

fig = plt.figure(figsize=(12, 4))
ax1 = fig.add_subplot(121, projection='3d')

xx, yy = np.meshgrid(x, y)
x, y = xx.ravel(), yy.ravel()

top = z
bottom = np.zeros_like(top)
width = 1
depth = 10

ax1.bar3d(x, y, bottom, width, depth, top, shade=True)
ax1.set_title('Shaded')

plt.show()

In [None]:
# results
results = pd.read_json("predicted_true.json")

In [None]:
# data
bin_counts = results["data"]["bin_counts"]
bin_averages = results["data"]["bin_averages"]
# this will not work
# lower_bound = results["data"]["bin_averages"] - results["data"]["bin_errors"]
# upper_bound = results["data"]["bin_averages"] + results["data"]["bin_errors"]
# convert to numpy array to perform arithmetic on each value in list
lower_bound = np.array(results["data"]["bin_averages"]) - np.array(results["data"]["bin_errors"])
upper_bound = np.array(results["data"]["bin_averages"]) + np.array(results["data"]["bin_errors"])

# set size of figure
plt.figure(figsize=(12, 5))

# allows for subplots
fig, ax1 = plt.subplots(figsize=(12, 5))

# line graph
# dark blue line
ax1.plot(bin_averages)
# light blue area around
ax1.plot(lower_bound, color="paleturquoise")
ax1.plot(upper_bound, color="paleturquoise")
# y-axis
ax1.set_ylim(-60, 60)
ax1.set_yticks(np.arange(-30, 61, 30))
# horizontal gridlines
ax1.grid(axis='y')
# fill area
ax1.fill_between(range(len(bin_counts)), lower_bound, upper_bound, color="paleturquoise")

# allows for two differnt y-axes
ax2 = ax1.twinx()

# bar graph
ax2.bar(range(len(bin_counts)), height=bin_counts)
# y-axis
ax2.set_ylim(0, 961)
ax2.set_yticks(np.arange(0, 241, 80))
# horizontal gridlines
ax2.grid(axis='y')

# labels
ax1.set_title("Predicted vs. True")
ax1.set_xlabel("True Values")
ax1.set_ylabel("Predicted Values")
ax2.set_ylabel("Bin Counts")
ax2.yaxis.set_label_coords(1.05, 0.125)

# saving graph
# plt.savefig('testing.png')

plt.show()

In [None]:
# challenge activity
# read csv file "challenge.csv" to get x and y values
# set figure size to (12, 5)
# x lim is from 0 to 100
# y lim is from 0 to 50
# graph scatter plot using x and y values (hint: plt.scatter(x, y))
# set title to what the graph depicts
# first one to get it gets prize