In [2]:
import plotly.express as px
import pandas as pd

In [None]:
# Basic Anatomy

fig = px.plotting_function(
    dataframe,
    x='column-for-xaxis',
    y='column-for-yaxis',
    title='Title For the Plot',
    width=width_in_pixels,
    height=height_in_pixels
)

fig.show()

In [4]:
diamonds = pd.read_csv("diamonds.csv")
# The dataset contains over 53k diamonds with 10 physical characteristics.
diamonds.head()

Unnamed: 0.1,Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,1,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
1,2,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
2,3,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
3,4,0.29,Premium,I,VS2,62.4,58.0,334,4.2,4.23,2.63
4,5,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75


In [6]:
# HISTOGRAMS

fig = px.histogram(
  diamonds,
  x="price",
  title="Histogram of diamond prices",
  width=600,
  height=400,
)

fig.show()

In [12]:
# There are too many bins. Because of that we can change the number

nbins = int(len(diamonds) ** (1 / 2))

fig = px.histogram(
  diamonds,
  x="price",
  title="Histogram of diamond prices",
  nbins=70,
  width=600,
  height=400,
)


fig.show()

In [14]:
# BAR CHARTS

fig = px.histogram(diamonds, x="cut",width=600,height=400)
# we get the cut quality within the column, plus the total count

fig.show()

In [15]:
# adding the labels

fig.update_layout(
  title="The number of diamonds in each diamond cut category",
  xaxis_title="",
  yaxis_title="Count",
)

In [None]:
########################################################################################

In [16]:
# Now, let's see the mean price for each diamond cut category.

mean_prices = (
    diamonds.groupby("cut")["price"].mean().reset_index()
)

mean_prices

Unnamed: 0,cut,price
0,Fair,4358.757764
1,Good,3928.864452
2,Ideal,3457.54197
3,Premium,4584.257704
4,Very Good,3981.759891


In [19]:
# Chaining the reset_index function to the end of a groupby operation turns the output into a DataFrame and Plotly loves DataFrames.
# So, let's pass it to its bar function:

fig = px.bar(mean_prices, x="cut", y="price")

fig.update_layout(
  title="Average diamond prices for each cut category",
  xaxis_title="",
  yaxis_title="Mean price ($)",
  width=800,height=400
)

fig.show()

In [None]:
# BOX PLOTS

In [20]:
fig = px.box(diamonds, x="clarity", y="carat")

fig.update_layout(
  title="Distribution of diamond carats for each clarity category",
  xaxis_title="Clarity",
  yaxis_title="Carat",
)

fig.show()



---


**Visualizing relationships between features**

In [None]:
# SCATTER

In [22]:
fig = px.scatter(diamonds, x="price", y="carat")

fig.update_layout(
  title="Price vs. Carat",
  xaxis_title="Price ($)",
  yaxis_title="Carat",
  width=700,height=500
)

fig.show()

In [23]:
# We have run into a bad case of overplotting
# We can fix that by plotting only ~10% of the dataset, which will be enough to reveal any existing patterns:

fig = px.scatter(
  diamonds.sample(5000), x="price", y="carat"
)

fig.update_layout(
  title="Price vs. Carat",
  xaxis_title="Price ($)",
  yaxis_title="Carat",
  width=700,height=500
)

fig.show()


---

**Customizing plots**

In [25]:
# COLORS
# we can color each dot in the plot based on which category the diamond belongs to

sample = diamonds.sample(3000)

fig = px.scatter(sample, x="price", y="carat", color="cut",width=700,height=500)

fig.show()

In [26]:
# MARKER SIZE
# By setting the size parameter to carat, we get differently sized dots based on diamond carat:

fig = px.scatter(sample, x="price", y="x", size="carat",color="cut",width=700,height=500)

fig.show()