In [1]:
import pandas as pd

# Here’s an example. Lets create a dataframe of letter grades in descending order. We can also set an index
# value and here we'll just make it some human judgement of how good a student was, like "excellent" or "good"

df=pd.DataFrame(['A+', 'A', 'A-', 'B+', 'B', 'B-', 'C+', 'C', 'C-', 'D+', 'D'],
                index=['excellent', 'excellent', 'excellent', 'good', 'good', 'good', 
                       'ok', 'ok', 'ok', 'poor', 'poor'],
               columns=["Grades"])
df

Unnamed: 0,Grades
excellent,A+
excellent,A
excellent,A-
good,B+
good,B
good,B-
ok,C+
ok,C
ok,C-
poor,D+


In [2]:
df.dtypes

Grades    object
dtype: object

In [5]:
# We can, however, tell pandas that we want to change the type to category, using the astype() function
df["Grades"].astype("category").head()

excellent    A+
excellent     A
excellent    A-
good         B+
good          B
Name: Grades, dtype: category
Categories (11, object): ['A', 'A+', 'A-', 'B', ..., 'C+', 'C-', 'D', 'D+']

In [6]:
m_category = pd.CategoricalDtype(categories=['D', 'D+', 'C-', 'C', 'C+', 'B-', 'B', 'B+', 'A-', 'A', 'A+'],ordered=True)

grades = df["Grades"].astype(m_category)
grades

excellent    A+
excellent     A
excellent    A-
good         B+
good          B
good         B-
ok           C+
ok            C
ok           C-
poor         D+
poor          D
Name: Grades, dtype: category
Categories (11, object): ['D' < 'D+' < 'C-' < 'C' ... 'B+' < 'A-' < 'A' < 'A+']

In [7]:
df[df["Grades"] > "C"]

Unnamed: 0,Grades
ok,C+
ok,C-
poor,D+
poor,D


In [8]:
type(grades)

pandas.core.series.Series

In [9]:
grades[grades>"C"]

excellent    A+
excellent     A
excellent    A-
good         B+
good          B
good         B-
ok           C+
Name: Grades, dtype: category
Categories (11, object): ['D' < 'D+' < 'C-' < 'C' ... 'B+' < 'A-' < 'A' < 'A+']

In [11]:
import numpy as np
import pandas as pd
df = pd.read_csv("../../Datasets/census.csv")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3193 entries, 0 to 3192
Data columns (total 100 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   SUMLEV                 3193 non-null   int64  
 1   REGION                 3193 non-null   int64  
 2   DIVISION               3193 non-null   int64  
 3   STATE                  3193 non-null   int64  
 4   COUNTY                 3193 non-null   int64  
 5   STNAME                 3193 non-null   object 
 6   CTYNAME                3193 non-null   object 
 7   CENSUS2010POP          3193 non-null   int64  
 8   ESTIMATESBASE2010      3193 non-null   int64  
 9   POPESTIMATE2010        3193 non-null   int64  
 10  POPESTIMATE2011        3193 non-null   int64  
 11  POPESTIMATE2012        3193 non-null   int64  
 12  POPESTIMATE2013        3193 non-null   int64  
 13  POPESTIMATE2014        3193 non-null   int64  
 14  POPESTIMATE2015        3193 non-null   int64  
 15  NPO

In [12]:
df = df[df["SUMLEV"] == 50]

In [13]:
df.head(3)

Unnamed: 0,SUMLEV,REGION,DIVISION,STATE,COUNTY,STNAME,CTYNAME,CENSUS2010POP,ESTIMATESBASE2010,POPESTIMATE2010,...,RDOMESTICMIG2011,RDOMESTICMIG2012,RDOMESTICMIG2013,RDOMESTICMIG2014,RDOMESTICMIG2015,RNETMIG2011,RNETMIG2012,RNETMIG2013,RNETMIG2014,RNETMIG2015
1,50,3,6,1,1,Alabama,Autauga County,54571,54571,54660,...,7.242091,-2.915927,-3.012349,2.265971,-2.530799,7.606016,-2.626146,-2.722002,2.59227,-2.187333
2,50,3,6,1,3,Alabama,Baldwin County,182265,182265,183193,...,14.83296,17.647293,21.845705,19.243287,17.197872,15.844176,18.559627,22.727626,20.317142,18.293499
3,50,3,6,1,5,Alabama,Barbour County,27457,27457,27341,...,-4.728132,-2.50069,-7.056824,-3.904217,-10.543299,-4.874741,-2.758113,-7.167664,-3.978583,-10.543299


In [16]:
df = df.groupby("STNAME")["CENSUS2010POP"].agg(np.average)
df.head()

STNAME
Alabama        71339.343284
Alaska         24490.724138
Arizona       426134.466667
Arkansas       38878.906667
California    642309.586207
Name: CENSUS2010POP, dtype: float64

In [19]:
df = pd.cut(df,10)

In [26]:
print(df["Alabama"])

(11706.087, 75333.413]
