### Scatter plot is a relational plot
#### Here we use scatter plot to study relationship between variables: Positive Trend | Negative Trend | No correlation

In [1]:
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
diamonds = sns.load_dataset('diamonds')
diamonds.head()

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
1,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
2,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
3,0.29,Premium,I,VS2,62.4,58.0,334,4.2,4.23,2.63
4,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75


In [3]:
diamonds.shape

(53940, 10)

In [4]:
# diamonds = diamonds.query("cut in ['Premium', 'Good'] & color in ['D','F','J']")
diamonds = diamonds[ diamonds['cut'].isin(['Premium', 'Good']) & diamonds['color'].isin(['D','F','J']) ]

In [5]:
diamonds.shape

(6620, 10)

In [6]:
diamonds.cut.info()

<class 'pandas.core.series.Series'>
Index: 6620 entries, 4 to 53936
Series name: cut
Non-Null Count  Dtype   
--------------  -----   
6620 non-null   category
dtypes: category(1)
memory usage: 58.4 KB


In [7]:
diamonds.cut.value_counts()

cut
Premium      4742
Good         1878
Ideal           0
Very Good       0
Fair            0
Name: count, dtype: int64

In [8]:
diamonds.cut.cat.categories

Index(['Ideal', 'Premium', 'Very Good', 'Good', 'Fair'], dtype='object')

In [9]:
diamonds.color.value_counts()

color
F    3240
D    2265
J    1115
E       0
G       0
H       0
I       0
Name: count, dtype: int64

In [10]:
diamonds.color.cat.categories

Index(['D', 'E', 'F', 'G', 'H', 'I', 'J'], dtype='object')

In [None]:
with plt.style.context('fivethirtyeight'):
    sns.scatterplot(data=diamonds, x='carat', y='price')

plt.show()

<img src='./plots/scatter-plot-1.png'>

### Lets use a sample

In [11]:
diamonds = diamonds.sample(n=300, random_state=42)

In [None]:
with plt.style.context('fivethirtyeight'):
    sns.scatterplot(data=diamonds, x='carat', y='price')

plt.show()

<img src='./plots/scatter-plot--2.png'>

In [13]:
with plt.style.context('fivethirtyeight'):
    sns.scatterplot(data=diamonds, x='x', y='y')

plt.show()

<img src='./plots/scatter-plot--3.png'>

### Semantics : | Hue | style | size

In [15]:
with plt.style.context('fivethirtyeight'):
    sns.scatterplot(data=diamonds, x='carat', y='price', hue='cut')

plt.show()

<img src='./plots/scatter-plot--4.png'>

#### You can use continuous variables in `hue`

In [17]:
with plt.style.context('fivethirtyeight'):
    sns.scatterplot(data=diamonds, x='carat', y='price', hue='depth')

plt.show()

<img src='./plots/scatter-plot--5.png'>

### size

In [20]:
with plt.style.context('fivethirtyeight'):
    sns.scatterplot(
        data=diamonds, x='carat', y='price', hue='cut', s=150, alpha=0.2, linewidth=1, edgecolor='black')

plt.show()

<img src='./plots/scatter-plot--6.png'>

### style

In [22]:
with plt.style.context('fivethirtyeight'):
    sns.scatterplot(
        data=diamonds, x='carat', y='price', hue='cut',  style='cut',
        s=100, alpha=0.4, linewidth=1, edgecolor='black')

plt.show()

<img src='./plots/scatter-plot--7.png'>

### change the size 

In [24]:
with plt.style.context('fivethirtyeight'):
    sns.scatterplot(
        data=diamonds, x='carat', y='price',  size='depth'
    )

plt.show()

<img src='./plots/scatter-plot--8.png'>

### we can pass custom sizes

In [None]:
diamonds.cut.cat.categories

In [26]:
with plt.style.context('fivethirtyeight'):
    sns.scatterplot(
        data=diamonds, x='carat', y='price',  size='cut', sizes=[10, 50, 100, 200, 300]
    )

plt.show()

<img src='./plots/scatter-plot--9.png'>

In [29]:
with plt.style.context('fivethirtyeight'):
    sns.scatterplot(
        data=diamonds, x='carat', y='price',  size='cut', sizes=[200, 150, 100, 50, 25]
    )

plt.show()

<img src='./plots/scatter-plot--10.png'>

### We can mix and match | HUE | SIZE | STYLE

In [13]:
with plt.style.context('fivethirtyeight'):
    sns.scatterplot(
        data=diamonds, x='carat', y='price',  
        hue='cut',
        style='cut',
        size='cut', sizes=[200, 150, 100, 50, 25]
    )

plt.show()

<img src='./plots/scatter-plot--11.png'>