In [1]:
import numpy as np
import pandas as pd
import datetime
from datetime import datetime, date
pd.set_option('display.max_columns', 8)
pd.set_option('display.max_rows',10)
pd.set_option('display.width',65)
import matplotlib.pyplot as plt

In [2]:
lmh_values = ['low','high','medium','medium','high']
lmh_cat = pd.Categorical(lmh_values)
lmh_cat

['low', 'high', 'medium', 'medium', 'high']
Categories (3, object): ['high', 'low', 'medium']

In [3]:
lmh_cat.codes

array([1, 0, 2, 2, 0], dtype=int8)

In [4]:
lmh_cat = pd.Categorical(lmh_values,
                        categories=['low','medium','high'])
lmh_cat

['low', 'high', 'medium', 'medium', 'high']
Categories (3, object): ['low', 'medium', 'high']

In [5]:
lmh_cat.sort_values()

['low', 'medium', 'medium', 'high', 'high']
Categories (3, object): ['low', 'medium', 'high']

In [6]:
cat_series = pd.Series(lmh_values, dtype = 'category')
cat_series

0       low
1      high
2    medium
3    medium
4      high
dtype: category
Categories (3, object): ['high', 'low', 'medium']

In [7]:
lmh_values

['low', 'high', 'medium', 'medium', 'high']

In [8]:
s = pd.Series(lmh_values)
as_cat = s.astype('category')
as_cat

0       low
1      high
2    medium
3    medium
4      high
dtype: category
Categories (3, object): ['high', 'low', 'medium']

In [9]:
cat_series.cat

<pandas.core.arrays.categorical.CategoricalAccessor object at 0x000002296D279750>

In [10]:
cat_series.cat.categories

Index(['high', 'low', 'medium'], dtype='object')

In [11]:
np.random.seed(123456)
values = np.random.randint(0,100,5)#rand 다음 int는 정수를 0~100 사이에서 5개
#rand다음 n은 개수
bins = pd.DataFrame({'Values':values})
bins

Unnamed: 0,Values
0,65
1,49
2,56
3,43
4,43


In [12]:
bins['Group'] = pd.cut(values, range(0,101,20))
bins

Unnamed: 0,Values,Group
0,65,"(60, 80]"
1,49,"(40, 60]"
2,56,"(40, 60]"
3,43,"(40, 60]"
4,43,"(40, 60]"


In [13]:
bins.Group

0    (60, 80]
1    (40, 60]
2    (40, 60]
3    (40, 60]
4    (40, 60]
Name: Group, dtype: category
Categories (5, interval[int64, right]): [(0, 20] < (20, 40] < (40, 60] < (60, 80] < (80, 100]]

In [14]:
metal_values = ['bronze','silver','gold','gold']
metal_categories = ['bronze','silver','gold']
metals = pd.Categorical(metal_values,
                       categories=metal_categories,
                       ordered= True)
metals

['bronze', 'silver', 'gold', 'gold']
Categories (3, object): ['bronze' < 'silver' < 'gold']

In [15]:
metals.categories

Index(['bronze', 'silver', 'gold'], dtype='object')

In [16]:
metals_reversed_values = pd.Categorical(
    metals[::-1],
    categories= metals.categories,
    ordered = True)
metals_reversed_values

['gold', 'gold', 'silver', 'bronze']
Categories (3, object): ['bronze' < 'silver' < 'gold']

In [17]:
metals <= metals_reversed_values

array([ True,  True, False, False])

In [18]:
metals_reversed_values.codes

array([2, 2, 1, 0], dtype=int8)

In [19]:
metals.codes

array([0, 1, 2, 2], dtype=int8)

In [20]:
pd.Categorical(['bronze','copper'],
              categories=metal_categories)

['bronze', NaN]
Categories (3, object): ['bronze', 'silver', 'gold']

In [21]:
cat = pd.Categorical(['a','b','c','a'],
                    categories=['a','b','c'])
cat

['a', 'b', 'c', 'a']
Categories (3, object): ['a', 'b', 'c']

In [22]:
renamed = cat.rename_categories(['bronze','silver','gold'])
renamed

['bronze', 'silver', 'gold', 'bronze']
Categories (3, object): ['bronze', 'silver', 'gold']

In [23]:
cat

['a', 'b', 'c', 'a']
Categories (3, object): ['a', 'b', 'c']

In [24]:
renamed

['bronze', 'silver', 'gold', 'bronze']
Categories (3, object): ['bronze', 'silver', 'gold']

In [25]:
with_platinum = metals.add_categories(['platinum'])
with_platinum

['bronze', 'silver', 'gold', 'gold']
Categories (4, object): ['bronze' < 'silver' < 'gold' < 'platinum']

In [26]:
no_bronze = metals.remove_categories(['bronze'])
no_bronze

[NaN, 'silver', 'gold', 'gold']
Categories (2, object): ['gold' < 'silver']

In [27]:
with_platinum.remove_unused_categories()

['bronze', 'silver', 'gold', 'gold']
Categories (3, object): ['bronze' < 'silver' < 'gold']

In [28]:
np.random.seed(123456)
names=['Ivana','Norris','Ruth','Lane','Skye','Sol',
       'Dylan','Katina','Alissa','Marc']
grades = np.random.randint(50,101,len(names))
scores = pd.DataFrame({'Name':names,'Grade':grades})
scores

Unnamed: 0,Name,Grade
0,Ivana,51
1,Norris,92
2,Ruth,100
3,Lane,99
4,Skye,93
5,Sol,97
6,Dylan,93
7,Katina,77
8,Alissa,82
9,Marc,73


In [33]:
score_bins = [0,59,62,66,69,72,76,79,82,
              86,89,92,99,100]
letter_grades = ['F','D-','D','D+','C-','C','C+','B-','B',
                 'B+','A-','A','A+']

In [34]:
letter_cats = pd.cut(scores.Grade, score_bins, labels=letter_grades)
scores['Letter'] = letter_cats
scores

Unnamed: 0,Name,Grade,Letter
0,Ivana,51,F
1,Norris,92,A-
2,Ruth,100,A+
3,Lane,99,A
4,Skye,93,A
5,Sol,97,A
6,Dylan,93,A
7,Katina,77,C+
8,Alissa,82,B-
9,Marc,73,C


In [31]:
letter_cats

0     F
1    A-
2    A+
3     A
4     A
5     A
6     A
7    C+
8    B-
9     C
Name: Grade, dtype: category
Categories (13, object): ['F' < 'D-' < 'D' < 'D+' ... 'B+' < 'A-' < 'A' < 'A+']

In [35]:
scores.Letter.value_counts()

Letter
A     4
F     1
C     1
C+    1
B-    1
     ..
D     0
D+    0
C-    0
B     0
B+    0
Name: count, Length: 13, dtype: int64

In [37]:
scores.sort_values(by=['Letter'], ascending=False)

Unnamed: 0,Name,Grade,Letter
2,Ruth,100,A+
3,Lane,99,A
4,Skye,93,A
5,Sol,97,A
6,Dylan,93,A
1,Norris,92,A-
8,Alissa,82,B-
7,Katina,77,C+
9,Marc,73,C
0,Ivana,51,F
