In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
%matplotlib qt5
sns.set_style('whitegrid')
sns.set_palette(palette='tab10')

In [3]:
df = pd.DataFrame(data={'x': np.arange(0, 11, 1),
                        'y': np.arange(0, 22, 2)})

In [4]:
df.head()

Unnamed: 0,x,y
0,0,0
1,1,2
2,2,4
3,3,6
4,4,8


In [5]:
df.dtypes

x    int32
y    int32
dtype: object

In [6]:
df['x'].head() % 2

0    0
1    1
2    0
3    1
4    0
Name: x, dtype: int32

In [7]:
df['x'].head() % 2 == 0

0     True
1    False
2     True
3    False
4     True
Name: x, dtype: bool

In [8]:
df['x'].head() % 2

0    0
1    1
2    0
3    1
4    0
Name: x, dtype: int32

In [9]:
df['x'].head() % 2 == 0

0     True
1    False
2     True
3    False
4     True
Name: x, dtype: bool

In [10]:
df['parity'] = df['x'] % 2 == 0

In [11]:
df['parity'] = df['parity'].astype('category')

In [12]:
df['parity'] = df['parity'].cat.rename_categories({True: 'even',
                                                   False: 'odd'})

In [13]:
df.head()

Unnamed: 0,x,y,parity
0,0,0,even
1,1,2,odd
2,2,4,even
3,3,6,odd
4,4,8,even


In [14]:
df.dtypes

x            int32
y            int32
parity    category
dtype: object

In [15]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11 entries, 0 to 10
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype   
---  ------  --------------  -----   
 0   x       11 non-null     int32   
 1   y       11 non-null     int32   
 2   parity  11 non-null     category
dtypes: category(1), int32(2)
memory usage: 355.0 bytes


In [16]:
df.describe()

Unnamed: 0,x,y
count,11.0,11.0
mean,5.0,10.0
std,3.316625,6.63325
min,0.0,0.0
25%,2.5,5.0
50%,5.0,10.0
75%,7.5,15.0
max,10.0,20.0


In [17]:
sns.get_dataset_names()

['anagrams',
 'anscombe',
 'attention',
 'brain_networks',
 'car_crashes',
 'diamonds',
 'dots',
 'dowjones',
 'exercise',
 'flights',
 'fmri',
 'geyser',
 'glue',
 'healthexp',
 'iris',
 'mpg',
 'penguins',
 'planets',
 'seaice',
 'taxis',
 'tips',
 'titanic']

In [18]:
flights = sns.load_dataset(name='flights')

In [19]:
flights.head()

Unnamed: 0,year,month,passengers
0,1949,Jan,112
1,1949,Feb,118
2,1949,Mar,132
3,1949,Apr,129
4,1949,May,121


In [20]:
flights.dtypes

year             int64
month         category
passengers       int64
dtype: object

In [21]:
flights.shape

(144, 3)

In [22]:
flights.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 144 entries, 0 to 143
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   year        144 non-null    int64   
 1   month       144 non-null    category
 2   passengers  144 non-null    int64   
dtypes: category(1), int64(2)
memory usage: 2.9 KB


In [23]:
flights.describe()

Unnamed: 0,year,passengers
count,144.0,144.0
mean,1954.5,280.298611
std,3.464102,119.966317
min,1949.0,104.0
25%,1951.75,180.0
50%,1954.5,265.5
75%,1957.25,360.5
max,1960.0,622.0


In [24]:
exercise = sns.load_dataset(name='exercise')

In [25]:
exercise.head()

Unnamed: 0.1,Unnamed: 0,id,diet,pulse,time,kind
0,0,1,low fat,85,1 min,rest
1,1,1,low fat,85,15 min,rest
2,2,1,low fat,88,30 min,rest
3,3,2,low fat,90,1 min,rest
4,4,2,low fat,92,15 min,rest


In [26]:
exercise.dtypes

Unnamed: 0       int64
id               int64
diet          category
pulse            int64
time          category
kind          category
dtype: object

In [27]:
exercise.shape

(90, 6)

In [28]:
exercise.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 90 entries, 0 to 89
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   Unnamed: 0  90 non-null     int64   
 1   id          90 non-null     int64   
 2   diet        90 non-null     category
 3   pulse       90 non-null     int64   
 4   time        90 non-null     category
 5   kind        90 non-null     category
dtypes: category(3), int64(3)
memory usage: 2.9 KB


In [29]:
exercise.describe()

Unnamed: 0.1,Unnamed: 0,id,pulse
count,90.0,90.0,90.0
mean,44.5,15.5,99.7
std,26.124701,8.703932,14.858471
min,0.0,1.0,80.0
25%,22.25,8.0,90.25
50%,44.5,15.5,96.0
75%,66.75,23.0,103.0
max,89.0,30.0,150.0


In [30]:
tips = sns.load_dataset(name='tips')

In [31]:
tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [32]:
tips.dtypes

total_bill     float64
tip            float64
sex           category
smoker        category
day           category
time          category
size             int64
dtype: object

In [33]:
tips.shape

(244, 7)

In [34]:
tips.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   total_bill  244 non-null    float64 
 1   tip         244 non-null    float64 
 2   sex         244 non-null    category
 3   smoker      244 non-null    category
 4   day         244 non-null    category
 5   time        244 non-null    category
 6   size        244 non-null    int64   
dtypes: category(4), float64(2), int64(1)
memory usage: 7.4 KB


In [35]:
tips.describe() 

Unnamed: 0,total_bill,tip,size
count,244.0,244.0,244.0
mean,19.785943,2.998279,2.569672
std,8.902412,1.383638,0.9511
min,3.07,1.0,1.0
25%,13.3475,2.0,2.0
50%,17.795,2.9,2.0
75%,24.1275,3.5625,3.0
max,50.81,10.0,6.0


In [36]:
taxis = sns.load_dataset(name='taxis')

In [37]:
taxis.head()

Unnamed: 0,pickup,dropoff,passengers,distance,fare,tip,tolls,total,color,payment,pickup_zone,dropoff_zone,pickup_borough,dropoff_borough
0,2019-03-23 20:21:09,2019-03-23 20:27:24,1,1.6,7.0,2.15,0.0,12.95,yellow,credit card,Lenox Hill West,UN/Turtle Bay South,Manhattan,Manhattan
1,2019-03-04 16:11:55,2019-03-04 16:19:00,1,0.79,5.0,0.0,0.0,9.3,yellow,cash,Upper West Side South,Upper West Side South,Manhattan,Manhattan
2,2019-03-27 17:53:01,2019-03-27 18:00:25,1,1.37,7.5,2.36,0.0,14.16,yellow,credit card,Alphabet City,West Village,Manhattan,Manhattan
3,2019-03-10 01:23:59,2019-03-10 01:49:51,1,7.7,27.0,6.15,0.0,36.95,yellow,credit card,Hudson Sq,Yorkville West,Manhattan,Manhattan
4,2019-03-30 13:27:42,2019-03-30 13:37:14,3,2.16,9.0,1.1,0.0,13.4,yellow,credit card,Midtown East,Yorkville West,Manhattan,Manhattan


In [38]:
taxis.dtypes

pickup             datetime64[ns]
dropoff            datetime64[ns]
passengers                  int64
distance                  float64
fare                      float64
tip                       float64
tolls                     float64
total                     float64
color                      object
payment                    object
pickup_zone                object
dropoff_zone               object
pickup_borough             object
dropoff_borough            object
dtype: object

In [39]:
taxis.shape

(6433, 14)

In [40]:
taxis.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6433 entries, 0 to 6432
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   pickup           6433 non-null   datetime64[ns]
 1   dropoff          6433 non-null   datetime64[ns]
 2   passengers       6433 non-null   int64         
 3   distance         6433 non-null   float64       
 4   fare             6433 non-null   float64       
 5   tip              6433 non-null   float64       
 6   tolls            6433 non-null   float64       
 7   total            6433 non-null   float64       
 8   color            6433 non-null   object        
 9   payment          6389 non-null   object        
 10  pickup_zone      6407 non-null   object        
 11  dropoff_zone     6388 non-null   object        
 12  pickup_borough   6407 non-null   object        
 13  dropoff_borough  6388 non-null   object        
dtypes: datetime64[ns](2), float64(5), int64(

In [56]:
taxis = taxis.dropna(how='any')
taxis = taxis.reset_index()

In [57]:
taxis.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6341 entries, 0 to 6340
Data columns (total 15 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   index            6341 non-null   int64         
 1   pickup           6341 non-null   datetime64[ns]
 2   dropoff          6341 non-null   datetime64[ns]
 3   passengers       6341 non-null   int64         
 4   distance         6341 non-null   float64       
 5   fare             6341 non-null   float64       
 6   tip              6341 non-null   float64       
 7   tolls            6341 non-null   float64       
 8   total            6341 non-null   float64       
 9   color            6341 non-null   object        
 10  payment          6341 non-null   object        
 11  pickup_zone      6341 non-null   object        
 12  dropoff_zone     6341 non-null   object        
 13  pickup_borough   6341 non-null   object        
 14  dropoff_borough  6341 non-null   object 

In [58]:
taxis.describe()

Unnamed: 0,index,pickup,dropoff,passengers,distance,fare,tip,tolls,total
count,6341.0,6341,6341,6341.0,6341.0,6341.0,6341.0,6341.0,6341.0
mean,3215.45261,2019-03-16 08:30:26.574830080,2019-03-16 08:44:47.525784832,1.544078,2.997707,12.887931,1.972703,0.314793,18.310263
min,0.0,2019-02-28 23:29:03,2019-02-28 23:32:35,0.0,0.0,1.0,0.0,0.0,1.3
25%,1607.0,2019-03-08 15:28:20,2019-03-08 15:54:00,1.0,0.99,6.5,0.0,0.0,10.8
50%,3213.0,2019-03-15 21:57:47,2019-03-15 22:07:48,1.0,1.65,9.5,1.75,0.0,14.16
75%,4824.0,2019-03-23 17:45:29,2019-03-23 17:57:56,2.0,3.2,15.0,2.82,0.0,20.3
max,6432.0,2019-03-31 23:43:45,2019-04-01 00:13:58,6.0,36.7,150.0,23.19,24.02,174.82
std,1857.210281,,,1.207948,3.719775,10.722249,2.361897,1.369174,12.950365


In [42]:
iris = sns.load_dataset(name='iris')

In [43]:
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [44]:
iris.dtypes

sepal_length    float64
sepal_width     float64
petal_length    float64
petal_width     float64
species          object
dtype: object

In [45]:
iris.shape

(150, 5)

In [46]:
iris.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [47]:
iris.describe()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333
std,0.828066,0.435866,1.765298,0.762238
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [48]:
penguins = sns.load_dataset(name='penguins')

In [49]:
penguins.head()

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,Male
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,Female
2,Adelie,Torgersen,40.3,18.0,195.0,3250.0,Female
3,Adelie,Torgersen,,,,,
4,Adelie,Torgersen,36.7,19.3,193.0,3450.0,Female


In [50]:
penguins.dtypes

species               object
island                object
bill_length_mm       float64
bill_depth_mm        float64
flipper_length_mm    float64
body_mass_g          float64
sex                   object
dtype: object

In [51]:
penguins.shape

(344, 7)

In [52]:
penguins.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 344 entries, 0 to 343
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   species            344 non-null    object 
 1   island             344 non-null    object 
 2   bill_length_mm     342 non-null    float64
 3   bill_depth_mm      342 non-null    float64
 4   flipper_length_mm  342 non-null    float64
 5   body_mass_g        342 non-null    float64
 6   sex                333 non-null    object 
dtypes: float64(4), object(3)
memory usage: 18.9+ KB


In [53]:
penguins = penguins.dropna(how='any')
penguins = penguins.reset_index()

In [54]:
penguins.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 333 entries, 0 to 332
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   index              333 non-null    int64  
 1   species            333 non-null    object 
 2   island             333 non-null    object 
 3   bill_length_mm     333 non-null    float64
 4   bill_depth_mm      333 non-null    float64
 5   flipper_length_mm  333 non-null    float64
 6   body_mass_g        333 non-null    float64
 7   sex                333 non-null    object 
dtypes: float64(4), int64(1), object(3)
memory usage: 20.9+ KB


In [55]:
penguins.describe()

Unnamed: 0,index,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g
count,333.0,333.0,333.0,333.0,333.0
mean,172.303303,43.992793,17.164865,200.966967,4207.057057
std,97.346548,5.468668,1.969235,14.015765,805.215802
min,0.0,32.1,13.1,172.0,2700.0
25%,89.0,39.5,15.6,190.0,3550.0
50%,172.0,44.5,17.3,197.0,4050.0
75%,256.0,48.6,18.7,213.0,4775.0
max,343.0,59.6,21.5,231.0,6300.0


In [59]:
iris['species'] = iris['species'].astype('category')

In [62]:
iris.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype   
---  ------        --------------  -----   
 0   sepal_length  150 non-null    float64 
 1   sepal_width   150 non-null    float64 
 2   petal_length  150 non-null    float64 
 3   petal_width   150 non-null    float64 
 4   species       150 non-null    category
dtypes: category(1), float64(4)
memory usage: 5.1 KB


In [67]:
penguins['species'] = penguins['species'].astype('category')
penguins['island'] = penguins['island'].astype('category')
penguins['sex'] = penguins['sex'].astype('category')

In [68]:
penguins.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 333 entries, 0 to 332
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype   
---  ------             --------------  -----   
 0   index              333 non-null    int64   
 1   species            333 non-null    category
 2   island             333 non-null    category
 3   bill_length_mm     333 non-null    float64 
 4   bill_depth_mm      333 non-null    float64 
 5   flipper_length_mm  333 non-null    float64 
 6   body_mass_g        333 non-null    float64 
 7   sex                333 non-null    category
dtypes: category(3), float64(4), int64(1)
memory usage: 14.5 KB


In [69]:
taxis['color'] = taxis['color'].astype('category')
taxis['payment'] = taxis['payment'].astype('category')
taxis['pickup_zone'] = taxis['pickup_zone'].astype('category')
taxis['dropoff_zone'] = taxis['dropoff_zone'].astype('category')
taxis['pickup_borough'] = taxis['pickup_borough'].astype('category')
taxis['dropoff_borough'] = taxis['dropoff_borough'].astype('category')

In [70]:
taxis.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6341 entries, 0 to 6340
Data columns (total 15 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   index            6341 non-null   int64         
 1   pickup           6341 non-null   datetime64[ns]
 2   dropoff          6341 non-null   datetime64[ns]
 3   passengers       6341 non-null   int64         
 4   distance         6341 non-null   float64       
 5   fare             6341 non-null   float64       
 6   tip              6341 non-null   float64       
 7   tolls            6341 non-null   float64       
 8   total            6341 non-null   float64       
 9   color            6341 non-null   category      
 10  payment          6341 non-null   category      
 11  pickup_zone      6341 non-null   category      
 12  dropoff_zone     6341 non-null   category      
 13  pickup_borough   6341 non-null   category      
 14  dropoff_borough  6341 non-null   categor

In [72]:
fig = plt.figure(num=1, figsize=None, dpi=None)
ax1 = fig.add_subplot(111)

In [73]:
ax2 = sns.scatterplot(data=df, x='x', y='y')

In [74]:
ax1 == ax2

True

In [75]:
fig = plt.figure(num=2, figsize=None, dpi=None)
ax1 = fig.add_subplot(111)

In [76]:
fig = sns.relplot(data=df, x='x', y='y')

In [77]:
fig = plt.gcf()

In [78]:
fig = plt.figure(num=3)

In [80]:
fig.axes

[<Axes: xlabel='x', ylabel='y'>]

In [81]:
ax1 = fig.axes[0]

In [82]:
ax1.set_xlabel(r'$x$ (m)')

Text(0.5, 26.99999999999998, '$x$ (m)')

In [83]:
ax1.collections

<Axes.ArtistList of 1 collections>

In [84]:
scatter = ax1.collections[0]

In [85]:
plt.getp(scatter)

    agg_filter = None
    alpha = None
    animated = False
    array = None
    children = []
    clim = (None, None)
    clip_box = TransformedBbox(     Bbox(x0=0.0, y0=0.0, x1=1.0, ...
    clip_on = True
    clip_path = None
    cmap = <matplotlib.colors.ListedColormap object at 0x0000...
    edgecolor or ec or edgecolors = [[1. 1. 1. 1.]]
    facecolor or facecolors or fc = [[0.12156863 0.46666667 0.70588235 1.        ]]
    figure = Figure(500x500)
    fill = True
    gid = None
    hatch = None
    in_layout = True
    label = _child0
    linestyle or dashes or linestyles or ls = [(0.0, None)]
    linewidth or linewidths or lw = [0.48]
    mouseover = False
    offset_transform or transOffset = CompositeGenericTransform(     TransformWrapper(  ...
    offsets = [[0.0 0.0]  [1.0 2.0]  [2.0 4.0]  [3.0 6.0]  [4.0 ...
    path_effects = None
    paths = (Path(array([[ 0.        , -0.5       ],        [ ...
    picker = None
    pickradius = 5.0
    rasterized = False
    sizes = [36.

In [87]:
fig = plt.figure(2)
ax1 = fig.gca()
ax1 = sns.scatterplot(data=df, x='x', y='y',
                      hue='parity')

In [93]:
fig, axarray = plt.subplots(nrows=2, ncols=1, 
                            num=4, figsize=None, dpi=None)
sns.scatterplot(data=df, x='x', y='y',
                hue='parity',
                ax=axarray[0],
                palette='hsv')
sns.scatterplot(data=df, x='x', y='y',
                hue='parity',
                ax=axarray[1])

<Axes: xlabel='x', ylabel='y'>

In [94]:
fig = sns.relplot(data=df, x='x', y='y',
                  col='parity')

In [95]:
fig = sns.relplot(data=df, x='x', y='y',
                  row='parity')

In [96]:
fig = sns.relplot(data=flights, x='year', y='passengers',
                  hue='month')

In [98]:
fig = sns.relplot(data=flights, x='year', y='passengers',
                  col='month', col_wrap=4)

In [99]:
fig = sns.relplot(data=flights, x='year', y='passengers',
                  hue='month',
                  col='month', col_wrap=4)

In [100]:
fig = sns.relplot(data=flights, x='year', y='passengers',
                  hue='month',
                  col='month', col_wrap=4,
                  kind='line')

In [105]:
iris[iris['species']=='setosa'].head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [125]:
fig, axarray = plt.subplots(nrows=3, ncols=1,
                            num=11, figsize=None, dpi=None)
sns.rugplot(data=iris[iris['species']=='setosa'].head(7),
            x='sepal_length', ax=axarray[0],
            height=1)
sns.histplot(data=iris[iris['species']=='setosa'].head(7),
            x='sepal_length', ax=axarray[1], bins=9)
sns.kdeplot(data=iris[iris['species']=='setosa'].head(7),
            x='sepal_length', ax=axarray[2])

<Axes: xlabel='sepal_length', ylabel='Density'>