# seaborn (countplot) เหมาะกับข้อมูลที่เป็น discrete ถ้าเป็น continuous ควรเป็น histogram เพราะ มันจะแบ่งช่วงให้

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [None]:
print(f"pandas  version = {pd.__version__}")
print(f"seaborn version = {sns.__version__}")

## read miles per gallon dataset (mpg.csv)

In [None]:
url='https://github.com/prasertcbs/tutorial/raw/master/mpg.csv'
df=pd.read_csv(url)
df.head()

In [None]:
sns.countplot(x='manufacturer', data=df)

In [None]:
sns.countplot(y='manufacturer', data=df, color='lightgray')

In [None]:
sns.countplot(x='class', data=df)

### set chart (figure) size

In [None]:
plt.figure(figsize=(8, 5))
sns.countplot(x='class', data=df);

### count discrete variable

In [None]:
sns.countplot(x='cyl', data=df)

In [None]:
sns.countplot(x='year', data=df)

In [None]:
sns.countplot(x='cty', data=df)

### countplot order by value_counts

In [None]:
df['class'].value_counts()

In [None]:
df['class'].value_counts().index

In [None]:
sns.countplot(x = 'class', data = df)

In [None]:
sns.countplot(x = 'class', data = df,
              order = df['class'].value_counts().index)

In [None]:
# p = ['maroon','lightgray','maroon','deepskyblue','maroon','maroon','maroon']
p = ["deepskyblue" if v > 40 else "lightgray" for v in df['class'].value_counts()]
# p = ["deepskyblue" if v == 'pickup' else "lightgray" 
#          for v in df['class'].value_counts().index]

sns.countplot(x='class', data=df,
              palette=p,
              order = df['class'].value_counts().index)

### add horizontal line 

In [None]:
avg=df['class'].value_counts().mean()
plt.figure(figsize=(8,5))
sns.countplot(x = 'class',
              data = df,
              color = 'lightgray',
              order = df['class'].value_counts().index)
plt.axhline(y=avg, linestyle='--', color='deepskyblue')

### add verticle line

In [None]:
avg=df['class'].value_counts().mean()

sns.countplot(y = 'class',
              data = df,
              color = 'lightgray',
              order = df['class'].value_counts().index)
plt.axvline(x=avg, linestyle='--', color='deepskyblue')
plt.ylabel=''

In [None]:
avg=df['class'].value_counts().mean()
colors = ['tomato' if (x > avg) else 'lightgray' for x in df['class'].value_counts()]
colors

In [None]:
plt.figure(figsize=(8, 5))
sns.countplot(x = 'class',
              data = df,
              palette = colors,
              order = df['class'].value_counts().index)
plt.axhline(y=avg, linestyle='--', color='deepskyblue')

In [None]:
sns.countplot(x='drv', data=df)

### categorize 'drv' column

In [None]:
# เป็นการแปลงด้วยเอาคำที่ปรากฏอยู่ใน col ทั้งหมดมาแปลงเป็นคำที่เราต้องการ
df['drv']=df.drv.map({'f':'front', 'r':'rear', '4':'4-wheel'}).astype('category')
df.head()

In [None]:
sns.countplot(x='drv', data=df, order=['rear', 'front', '4-wheel'], color='lightgray')

In [None]:
sns.countplot(x='drv', data=df, 
              hue='year')

## create 'gear' column from trans

In [None]:
# เป็นการเอาตัวอักษรตัวแรกมาแปลงเป็นค่าที่เราต้องการ
df['gear']=df.trans.str[0].map({'a':'auto', 'm':'manual'}).astype("category")
df.head()

In [None]:
sns.countplot(x='drv', data=df, 
              hue='gear', 
              palette=['lawngreen', 'tomato'])
plt.xlabel('')
plt.title('demo countplot')

In [None]:
sns.countplot(y="gear", hue="year", data=df)