# pandas入门

## 读取csv文件

In [1]:
import pandas as pd

In [2]:
data=pd.read_csv("./data/tips.csv")
data

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.00,Female,Yes,Sat,Dinner,2
241,22.67,2.00,Male,Yes,Sat,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2


### 读取行列标签

In [3]:
data.columns

Index(['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size'], dtype='object')

In [4]:
data.index

RangeIndex(start=0, stop=244, step=1)

In [5]:
# 将某一列设置为行标签
data_index=data.set_index("sex")
data_index

Unnamed: 0_level_0,total_bill,tip,smoker,day,time,size
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Female,16.99,1.01,No,Sun,Dinner,2
Male,10.34,1.66,No,Sun,Dinner,3
Male,21.01,3.50,No,Sun,Dinner,3
Male,23.68,3.31,No,Sun,Dinner,2
Female,24.59,3.61,No,Sun,Dinner,4
...,...,...,...,...,...,...
Male,29.03,5.92,No,Sat,Dinner,3
Female,27.18,2.00,Yes,Sat,Dinner,2
Male,22.67,2.00,Yes,Sat,Dinner,2
Male,17.82,1.75,No,Sat,Dinner,2


In [6]:
# 将默认行编号的行标签改成指定标签
data_index.index

Index(['Female', 'Male', 'Male', 'Male', 'Female', 'Male', 'Male', 'Male',
       'Male', 'Male',
       ...
       'Male', 'Male', 'Male', 'Male', 'Female', 'Male', 'Female', 'Male',
       'Male', 'Female'],
      dtype='object', name='sex', length=244)

### 通过行列标签获取指定的行列数据(loc)

In [7]:
# 某些行的所有列
data.loc[[0,1]]

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3


In [8]:
# 某一行的某一列
data.loc[[1],['sex']]

Unnamed: 0,sex
1,Male


In [9]:
# 某些行的某些列
data.loc[[1,2],['sex','day']]

Unnamed: 0,sex,day
1,Male,Sun
2,Male,Sun


In [10]:
# 所有行的某些列
data.loc[:,['sex','day']]

Unnamed: 0,sex,day
0,Female,Sun
1,Male,Sun
2,Male,Sun
3,Male,Sun
4,Female,Sun
...,...,...
239,Male,Sat
240,Female,Sat
241,Male,Sat
242,Male,Sat


### 通过行列编号进行获取(iloc)

In [11]:
data_index.iloc[[1,2,3]]

Unnamed: 0_level_0,total_bill,tip,smoker,day,time,size
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Male,10.34,1.66,No,Sun,Dinner,3
Male,21.01,3.5,No,Sun,Dinner,3
Male,23.68,3.31,No,Sun,Dinner,2


In [12]:
data_index.iloc[[0,1,2],[0,1,2]]

Unnamed: 0_level_0,total_bill,tip,smoker
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Female,16.99,1.01,No
Male,10.34,1.66,No
Male,21.01,3.5,No


### 使用loc和iloc进行切片操作

In [13]:
data.loc[1:3,'sex':'day']

Unnamed: 0,sex,smoker,day
1,Male,No,Sun
2,Male,No,Sun
3,Male,No,Sun


In [14]:
data_index.iloc[1:3,0:3]

Unnamed: 0_level_0,total_bill,tip,smoker
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Male,10.34,1.66,No
Male,21.01,3.5,No


### []获取指定行列数据

In [15]:
data[['sex','day']]

Unnamed: 0,sex,day
0,Female,Sun
1,Male,Sun
2,Male,Sun
3,Male,Sun
4,Female,Sun
...,...,...
239,Male,Sat
240,Female,Sat
241,Male,Sat
242,Male,Sat


In [16]:
data[0:6:2]

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
2,21.01,3.5,Male,No,Sun,Dinner,3
4,24.59,3.61,Female,No,Sun,Dinner,4


## Series

### Series创建

In [17]:
s=pd.Series(['张三','李四'])
s

0    张三
1    李四
dtype: object

In [18]:
s1=pd.Series(['张三',10],index=['name','age'])
s1

name    张三
age     10
dtype: object

In [19]:
scientists=pd.read_csv('./data/scientists.csv')
# 一列为Series
age=scientists['Age']
age

0    37
1    61
2    90
3    66
4    56
5    45
6    41
7    77
Name: Age, dtype: int64

### Series属性

In [20]:
age.shape

(8,)

In [21]:
age.size

8

In [22]:
age.index

RangeIndex(start=0, stop=8, step=1)

In [23]:
age.values

array([37, 61, 90, 66, 56, 45, 41, 77], dtype=int64)

In [24]:
age.dtypes

dtype('int64')

### Series方法

In [25]:
age.keys()

RangeIndex(start=0, stop=8, step=1)

In [26]:
age.mean()

59.125

In [27]:
age.max()

90

In [28]:
age.min()

37

In [29]:
age.std()

18.325918413937288

In [30]:
age.value_counts()

37    1
61    1
90    1
66    1
56    1
45    1
41    1
77    1
Name: Age, dtype: int64

In [31]:
age.count() #非空个数

8

In [32]:
age.describe()

count     8.000000
mean     59.125000
std      18.325918
min      37.000000
25%      44.000000
50%      58.500000
75%      68.750000
max      90.000000
Name: Age, dtype: float64

### bool变量运算

In [33]:
bool_values=[False,False,True,False,True,False,True,True]
age[bool_values]

2    90
4    56
6    41
7    77
Name: Age, dtype: int64

In [34]:
age>age.mean()

0    False
1     True
2     True
3     True
4    False
5    False
6    False
7     True
Name: Age, dtype: bool

In [35]:
age[age>age.mean()]

1    61
2    90
3    66
7    77
Name: Age, dtype: int64

### Series运算

In [36]:
age+100

0    137
1    161
2    190
3    166
4    156
5    145
6    141
7    177
Name: Age, dtype: int64

In [37]:
age*2

0     74
1    122
2    180
3    132
4    112
5     90
6     82
7    154
Name: Age, dtype: int64

In [38]:
age+age

0     74
1    122
2    180
3    132
4    112
5     90
6     82
7    154
Name: Age, dtype: int64

In [39]:
s2=pd.Series([100,1])
age+s2

0    137.0
1     62.0
2      NaN
3      NaN
4      NaN
5      NaN
6      NaN
7      NaN
dtype: float64

## DataFrame

### DataFrame创建

In [40]:
df=pd.DataFrame({
            "name":["张三","李四"],
            "age":[10,20]})
df

Unnamed: 0,name,age
0,张三,10
1,李四,20


In [41]:
df=pd.DataFrame({
            "name":["张三","李四"],
            "age":[10,20]}
            ,columns=['age','name'],index=['zhangsan','lisi'])
df

Unnamed: 0,age,name
zhangsan,10,张三
lisi,20,李四


In [42]:
# 指定列作为行标签
df1=pd.read_csv("./data/scientists.csv",index_col='Name')
df1

Unnamed: 0_level_0,Born,Died,Age,Occupation
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Rosaline Franklin,1920-07-25,1958-04-16,37,Chemist
William Gosset,1876-06-13,1937-10-16,61,Statistician
Florence Nightingale,1820-05-12,1910-08-13,90,Nurse
Marie Curie,1867-11-07,1934-07-04,66,Chemist
Rachel Carson,1907-05-27,1964-04-14,56,Biologist
John Snow,1813-03-15,1858-06-16,45,Physician
Alan Turing,1912-06-23,1954-06-07,41,Computer Scientist
Johann Gauss,1777-04-30,1855-02-23,77,Mathematician


In [43]:
index_name={"Rosaline Franklin":"zhangsan"}
column_name={"Born":"born"}
df1.rename(index=index_name,columns=column_name)

Unnamed: 0_level_0,born,Died,Age,Occupation
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
zhangsan,1920-07-25,1958-04-16,37,Chemist
William Gosset,1876-06-13,1937-10-16,61,Statistician
Florence Nightingale,1820-05-12,1910-08-13,90,Nurse
Marie Curie,1867-11-07,1934-07-04,66,Chemist
Rachel Carson,1907-05-27,1964-04-14,56,Biologist
John Snow,1813-03-15,1858-06-16,45,Physician
Alan Turing,1912-06-23,1954-06-07,41,Computer Scientist
Johann Gauss,1777-04-30,1855-02-23,77,Mathematician


### DataFrame属性

In [44]:
scientists.shape

(8, 5)

In [45]:
scientists.size

40

In [46]:
scientists.ndim

2

In [47]:
scientists.dtypes #object为字符串

Name          object
Born          object
Died          object
Age            int64
Occupation    object
dtype: object

### DataFrame方法

In [48]:
len(scientists)

8

In [49]:
scientists.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Name        8 non-null      object
 1   Born        8 non-null      object
 2   Died        8 non-null      object
 3   Age         8 non-null      int64 
 4   Occupation  8 non-null      object
dtypes: int64(1), object(4)
memory usage: 448.0+ bytes


In [50]:
scientists.head() #取前五行数据

Unnamed: 0,Name,Born,Died,Age,Occupation
0,Rosaline Franklin,1920-07-25,1958-04-16,37,Chemist
1,William Gosset,1876-06-13,1937-10-16,61,Statistician
2,Florence Nightingale,1820-05-12,1910-08-13,90,Nurse
3,Marie Curie,1867-11-07,1934-07-04,66,Chemist
4,Rachel Carson,1907-05-27,1964-04-14,56,Biologist


In [51]:
scientists.tail()

Unnamed: 0,Name,Born,Died,Age,Occupation
3,Marie Curie,1867-11-07,1934-07-04,66,Chemist
4,Rachel Carson,1907-05-27,1964-04-14,56,Biologist
5,John Snow,1813-03-15,1858-06-16,45,Physician
6,Alan Turing,1912-06-23,1954-06-07,41,Computer Scientist
7,Johann Gauss,1777-04-30,1855-02-23,77,Mathematician


In [52]:
scientists.max()

Name          William Gosset
Born              1920-07-25
Died              1964-04-14
Age                       90
Occupation      Statistician
dtype: object

In [53]:
scientists.min()

Name          Alan Turing
Born           1777-04-30
Died           1855-02-23
Age                    37
Occupation      Biologist
dtype: object

In [54]:
scientists.count()

Name          8
Born          8
Died          8
Age           8
Occupation    8
dtype: int64

In [55]:
scientists.describe()

Unnamed: 0,Age
count,8.0
mean,59.125
std,18.325918
min,37.0
25%,44.0
50%,58.5
75%,68.75
max,90.0


In [56]:
scientists.describe(include='all')

Unnamed: 0,Name,Born,Died,Age,Occupation
count,8,8,8,8.0,8
unique,8,8,8,,7
top,Rosaline Franklin,1920-07-25,1958-04-16,,Chemist
freq,1,1,1,,2
mean,,,,59.125,
std,,,,18.325918,
min,,,,37.0,
25%,,,,44.0,
50%,,,,58.5,
75%,,,,68.75,


### bool运算

In [57]:
scientists[bool_values]

Unnamed: 0,Name,Born,Died,Age,Occupation
2,Florence Nightingale,1820-05-12,1910-08-13,90,Nurse
4,Rachel Carson,1907-05-27,1964-04-14,56,Biologist
6,Alan Turing,1912-06-23,1954-06-07,41,Computer Scientist
7,Johann Gauss,1777-04-30,1855-02-23,77,Mathematician


In [58]:
scientists[scientists['Age']>scientists['Age'].mean()]

Unnamed: 0,Name,Born,Died,Age,Occupation
1,William Gosset,1876-06-13,1937-10-16,61,Statistician
2,Florence Nightingale,1820-05-12,1910-08-13,90,Nurse
3,Marie Curie,1867-11-07,1934-07-04,66,Chemist
7,Johann Gauss,1777-04-30,1855-02-23,77,Mathematician


### 运算

In [59]:
scientists*2

Unnamed: 0,Name,Born,Died,Age,Occupation
0,Rosaline FranklinRosaline Franklin,1920-07-251920-07-25,1958-04-161958-04-16,74,ChemistChemist
1,William GossetWilliam Gosset,1876-06-131876-06-13,1937-10-161937-10-16,122,StatisticianStatistician
2,Florence NightingaleFlorence Nightingale,1820-05-121820-05-12,1910-08-131910-08-13,180,NurseNurse
3,Marie CurieMarie Curie,1867-11-071867-11-07,1934-07-041934-07-04,132,ChemistChemist
4,Rachel CarsonRachel Carson,1907-05-271907-05-27,1964-04-141964-04-14,112,BiologistBiologist
5,John SnowJohn Snow,1813-03-151813-03-15,1858-06-161858-06-16,90,PhysicianPhysician
6,Alan TuringAlan Turing,1912-06-231912-06-23,1954-06-071954-06-07,82,Computer ScientistComputer Scientist
7,Johann GaussJohann Gauss,1777-04-301777-04-30,1855-02-231855-02-23,154,MathematicianMathematician


In [60]:
scientists+scientists

Unnamed: 0,Name,Born,Died,Age,Occupation
0,Rosaline FranklinRosaline Franklin,1920-07-251920-07-25,1958-04-161958-04-16,74,ChemistChemist
1,William GossetWilliam Gosset,1876-06-131876-06-13,1937-10-161937-10-16,122,StatisticianStatistician
2,Florence NightingaleFlorence Nightingale,1820-05-121820-05-12,1910-08-131910-08-13,180,NurseNurse
3,Marie CurieMarie Curie,1867-11-071867-11-07,1934-07-041934-07-04,132,ChemistChemist
4,Rachel CarsonRachel Carson,1907-05-271907-05-27,1964-04-141964-04-14,112,BiologistBiologist
5,John SnowJohn Snow,1813-03-151813-03-15,1858-06-161858-06-16,90,PhysicianPhysician
6,Alan TuringAlan Turing,1912-06-231912-06-23,1954-06-071954-06-07,82,Computer ScientistComputer Scientist
7,Johann GaussJohann Gauss,1777-04-301777-04-30,1855-02-231855-02-23,154,MathematicianMathematician


In [61]:
scientists+scientists[:4]

Unnamed: 0,Name,Born,Died,Age,Occupation
0,Rosaline FranklinRosaline Franklin,1920-07-251920-07-25,1958-04-161958-04-16,74.0,ChemistChemist
1,William GossetWilliam Gosset,1876-06-131876-06-13,1937-10-161937-10-16,122.0,StatisticianStatistician
2,Florence NightingaleFlorence Nightingale,1820-05-121820-05-12,1910-08-131910-08-13,180.0,NurseNurse
3,Marie CurieMarie Curie,1867-11-071867-11-07,1934-07-041934-07-04,132.0,ChemistChemist
4,,,,,
5,,,,,
6,,,,,
7,,,,,


### DataFrame行列增删改

In [62]:
# 添加行
s3=pd.Series(['zhangsan','2023-07-12','2080-08-12','57','Teacher'],index=['Name','Born','Died','Age','Occupation'])
scientists.append(s3,ignore_index=True)

  scientists.append(s3,ignore_index=True)


Unnamed: 0,Name,Born,Died,Age,Occupation
0,Rosaline Franklin,1920-07-25,1958-04-16,37,Chemist
1,William Gosset,1876-06-13,1937-10-16,61,Statistician
2,Florence Nightingale,1820-05-12,1910-08-13,90,Nurse
3,Marie Curie,1867-11-07,1934-07-04,66,Chemist
4,Rachel Carson,1907-05-27,1964-04-14,56,Biologist
5,John Snow,1813-03-15,1858-06-16,45,Physician
6,Alan Turing,1912-06-23,1954-06-07,41,Computer Scientist
7,Johann Gauss,1777-04-30,1855-02-23,77,Mathematician
8,zhangsan,2023-07-12,2080-08-12,57,Teacher


In [63]:
# 修改行
scientists.loc[1]=['zhangsan','2023-07-12','2080-08-12','57','Teacher']
scientists

Unnamed: 0,Name,Born,Died,Age,Occupation
0,Rosaline Franklin,1920-07-25,1958-04-16,37,Chemist
1,zhangsan,2023-07-12,2080-08-12,57,Teacher
2,Florence Nightingale,1820-05-12,1910-08-13,90,Nurse
3,Marie Curie,1867-11-07,1934-07-04,66,Chemist
4,Rachel Carson,1907-05-27,1964-04-14,56,Biologist
5,John Snow,1813-03-15,1858-06-16,45,Physician
6,Alan Turing,1912-06-23,1954-06-07,41,Computer Scientist
7,Johann Gauss,1777-04-30,1855-02-23,77,Mathematician


In [64]:
scientists.loc[1,'Name']='wangwu'
scientists

Unnamed: 0,Name,Born,Died,Age,Occupation
0,Rosaline Franklin,1920-07-25,1958-04-16,37,Chemist
1,wangwu,2023-07-12,2080-08-12,57,Teacher
2,Florence Nightingale,1820-05-12,1910-08-13,90,Nurse
3,Marie Curie,1867-11-07,1934-07-04,66,Chemist
4,Rachel Carson,1907-05-27,1964-04-14,56,Biologist
5,John Snow,1813-03-15,1858-06-16,45,Physician
6,Alan Turing,1912-06-23,1954-06-07,41,Computer Scientist
7,Johann Gauss,1777-04-30,1855-02-23,77,Mathematician


In [65]:
# 删除行
scientists.drop(1)

Unnamed: 0,Name,Born,Died,Age,Occupation
0,Rosaline Franklin,1920-07-25,1958-04-16,37,Chemist
2,Florence Nightingale,1820-05-12,1910-08-13,90,Nurse
3,Marie Curie,1867-11-07,1934-07-04,66,Chemist
4,Rachel Carson,1907-05-27,1964-04-14,56,Biologist
5,John Snow,1813-03-15,1858-06-16,45,Physician
6,Alan Turing,1912-06-23,1954-06-07,41,Computer Scientist
7,Johann Gauss,1777-04-30,1855-02-23,77,Mathematician


In [66]:
# 列添加
scientists['Account']=[1,2,3,4,3,2,1,2]
scientists

Unnamed: 0,Name,Born,Died,Age,Occupation,Account
0,Rosaline Franklin,1920-07-25,1958-04-16,37,Chemist,1
1,wangwu,2023-07-12,2080-08-12,57,Teacher,2
2,Florence Nightingale,1820-05-12,1910-08-13,90,Nurse,3
3,Marie Curie,1867-11-07,1934-07-04,66,Chemist,4
4,Rachel Carson,1907-05-27,1964-04-14,56,Biologist,3
5,John Snow,1813-03-15,1858-06-16,45,Physician,2
6,Alan Turing,1912-06-23,1954-06-07,41,Computer Scientist,1
7,Johann Gauss,1777-04-30,1855-02-23,77,Mathematician,2


In [67]:
# 修改列
scientists.loc[1,'Account']=10
scientists

Unnamed: 0,Name,Born,Died,Age,Occupation,Account
0,Rosaline Franklin,1920-07-25,1958-04-16,37,Chemist,1
1,wangwu,2023-07-12,2080-08-12,57,Teacher,10
2,Florence Nightingale,1820-05-12,1910-08-13,90,Nurse,3
3,Marie Curie,1867-11-07,1934-07-04,66,Chemist,4
4,Rachel Carson,1907-05-27,1964-04-14,56,Biologist,3
5,John Snow,1813-03-15,1858-06-16,45,Physician,2
6,Alan Turing,1912-06-23,1954-06-07,41,Computer Scientist,1
7,Johann Gauss,1777-04-30,1855-02-23,77,Mathematician,2


In [68]:
# 删除列
scientists.drop('Account',axis=1)

Unnamed: 0,Name,Born,Died,Age,Occupation
0,Rosaline Franklin,1920-07-25,1958-04-16,37,Chemist
1,wangwu,2023-07-12,2080-08-12,57,Teacher
2,Florence Nightingale,1820-05-12,1910-08-13,90,Nurse
3,Marie Curie,1867-11-07,1934-07-04,66,Chemist
4,Rachel Carson,1907-05-27,1964-04-14,56,Biologist
5,John Snow,1813-03-15,1858-06-16,45,Physician
6,Alan Turing,1912-06-23,1954-06-07,41,Computer Scientist
7,Johann Gauss,1777-04-30,1855-02-23,77,Mathematician


### DataFrame导出

In [69]:
scientists.to_csv("./new_scientists.csv",index=False)

### DataFrame的查询

In [71]:
df2=pd.read_csv("./data/scientists.csv")

In [74]:
df2.loc[(df2['Age']>60) & (df2['Age']<80)]

Unnamed: 0,Name,Born,Died,Age,Occupation
1,William Gosset,1876-06-13,1937-10-16,61,Statistician
3,Marie Curie,1867-11-07,1934-07-04,66,Chemist
7,Johann Gauss,1777-04-30,1855-02-23,77,Mathematician


In [75]:
df2.query('Age > 60 & Age < 80')

Unnamed: 0,Name,Born,Died,Age,Occupation
1,William Gosset,1876-06-13,1937-10-16,61,Statistician
3,Marie Curie,1867-11-07,1934-07-04,66,Chemist
7,Johann Gauss,1777-04-30,1855-02-23,77,Mathematician


In [76]:
df2.groupby('Occupation')['Age'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
Occupation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Biologist,1.0,56.0,,56.0,56.0,56.0,56.0,56.0
Chemist,2.0,51.5,20.506097,37.0,44.25,51.5,58.75,66.0
Computer Scientist,1.0,41.0,,41.0,41.0,41.0,41.0,41.0
Mathematician,1.0,77.0,,77.0,77.0,77.0,77.0,77.0
Nurse,1.0,90.0,,90.0,90.0,90.0,90.0,90.0
Physician,1.0,45.0,,45.0,45.0,45.0,45.0,45.0
Statistician,1.0,61.0,,61.0,61.0,61.0,61.0,61.0


In [77]:
df2.groupby("Occupation").agg({
    'Name':'count',
    'Age':'mean'
})

Unnamed: 0_level_0,Name,Age
Occupation,Unnamed: 1_level_1,Unnamed: 2_level_1
Biologist,1,56.0
Chemist,2,51.5
Computer Scientist,1,41.0
Mathematician,1,77.0
Nurse,1,90.0
Physician,1,45.0
Statistician,1,61.0


In [78]:
df2.sort_values(by=['Age'],ascending=False)

Unnamed: 0,Name,Born,Died,Age,Occupation
2,Florence Nightingale,1820-05-12,1910-08-13,90,Nurse
7,Johann Gauss,1777-04-30,1855-02-23,77,Mathematician
3,Marie Curie,1867-11-07,1934-07-04,66,Chemist
1,William Gosset,1876-06-13,1937-10-16,61,Statistician
4,Rachel Carson,1907-05-27,1964-04-14,56,Biologist
5,John Snow,1813-03-15,1858-06-16,45,Physician
6,Alan Turing,1912-06-23,1954-06-07,41,Computer Scientist
0,Rosaline Franklin,1920-07-25,1958-04-16,37,Chemist


In [79]:
df2.sort_index(ascending=False)

Unnamed: 0,Name,Born,Died,Age,Occupation
7,Johann Gauss,1777-04-30,1855-02-23,77,Mathematician
6,Alan Turing,1912-06-23,1954-06-07,41,Computer Scientist
5,John Snow,1813-03-15,1858-06-16,45,Physician
4,Rachel Carson,1907-05-27,1964-04-14,56,Biologist
3,Marie Curie,1867-11-07,1934-07-04,66,Chemist
2,Florence Nightingale,1820-05-12,1910-08-13,90,Nurse
1,William Gosset,1876-06-13,1937-10-16,61,Statistician
0,Rosaline Franklin,1920-07-25,1958-04-16,37,Chemist


In [80]:
df2.nlargest(3,columns='Age')

Unnamed: 0,Name,Born,Died,Age,Occupation
2,Florence Nightingale,1820-05-12,1910-08-13,90,Nurse
7,Johann Gauss,1777-04-30,1855-02-23,77,Mathematician
3,Marie Curie,1867-11-07,1934-07-04,66,Chemist
