In [2]:
import pandas as pd

df = pd.DataFrame({'ID': ['H241', 'H143', 'H324', 'H324', 'H415'],
                        'Math': [17.5, 19, 14, 16, 11],
                        'Physics': [19, 13.5, 16.5, 12, 17],
                        'Chemistry': [16, 19, 16, 17, 11],
                        'Arts': [15, 18, 19, 12, 18]}, index=['Martin', 'Ken', 'Ben', 'Eric', 'Anna'])
df

Unnamed: 0,ID,Math,Physics,Chemistry,Arts
Martin,H241,17.5,19.0,16,15
Ken,H143,19.0,13.5,19,18
Ben,H324,14.0,16.5,16,19
Eric,H324,16.0,12.0,17,12
Anna,H415,11.0,17.0,11,18


## Rename elements in index or header in DataFrame 

In [3]:
df1 = df.rename(index={'Ken': 'Henry'})
df1

Unnamed: 0,ID,Math,Physics,Chemistry,Arts
Martin,H241,17.5,19.0,16,15
Henry,H143,19.0,13.5,19,18
Ben,H324,14.0,16.5,16,19
Eric,H324,16.0,12.0,17,12
Anna,H415,11.0,17.0,11,18


In [4]:
df1 = df1.rename(columns={'Physics': 'Science'})
df1

Unnamed: 0,ID,Math,Science,Chemistry,Arts
Martin,H241,17.5,19.0,16,15
Henry,H143,19.0,13.5,19,18
Ben,H324,14.0,16.5,16,19
Eric,H324,16.0,12.0,17,12
Anna,H415,11.0,17.0,11,18


## Set and Reset index 

In [5]:
df2 = df.set_index(['ID'])
df2

Unnamed: 0_level_0,Math,Physics,Chemistry,Arts
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
H241,17.5,19.0,16,15
H143,19.0,13.5,19,18
H324,14.0,16.5,16,19
H324,16.0,12.0,17,12
H415,11.0,17.0,11,18


In [6]:
df2 = df2.reset_index()
df2

Unnamed: 0,ID,Math,Physics,Chemistry,Arts
0,H241,17.5,19.0,16,15
1,H143,19.0,13.5,19,18
2,H324,14.0,16.5,16,19
3,H324,16.0,12.0,17,12
4,H415,11.0,17.0,11,18


## Edit a column and add new columns

In [7]:
df3 = df.copy()
df3['Chemistry'] = [11, 19, 13, 13, 18]
df3['GPA-Science'] = 0.5 * (df3['Chemistry'] + df3['Physics'])
df3['Scaled'] = df3['GPA-Science'] / 4
df3

Unnamed: 0,ID,Math,Physics,Chemistry,Arts,GPA-Science,Scaled
Martin,H241,17.5,19.0,11,15,15.0,3.75
Ken,H143,19.0,13.5,19,18,16.25,4.0625
Ben,H324,14.0,16.5,13,19,14.75,3.6875
Eric,H324,16.0,12.0,13,12,12.5,3.125
Anna,H415,11.0,17.0,18,18,17.5,4.375


## Insert new columns

In [8]:
df4 = df.copy()     # cannot be saved in another variable, inplace is not attribute for insert
df4.insert(2, 'Geometry', [10, 14, 18, 13, 8])
df4.insert(3, 'GPA-Math', 0.5 * (df4['Math'] + df4['Geometry']))
df4

Unnamed: 0,ID,Math,Geometry,GPA-Math,Physics,Chemistry,Arts
Martin,H241,17.5,10,13.75,19.0,16,15
Ken,H143,19.0,14,16.5,13.5,19,18
Ben,H324,14.0,18,16.0,16.5,16,19
Eric,H324,16.0,13,14.5,12.0,17,12
Anna,H415,11.0,8,9.5,17.0,11,18


## Remove a row or column

In [9]:
df5 = df.drop(['Arts', 'Chemistry'], axis=1)
df5

Unnamed: 0,ID,Math,Physics
Martin,H241,17.5,19.0
Ken,H143,19.0,13.5
Ben,H324,14.0,16.5
Eric,H324,16.0,12.0
Anna,H415,11.0,17.0


In [10]:
df5 = df5.drop(['Martin', 'Ken'])
df5

Unnamed: 0,ID,Math,Physics
Ben,H324,14.0,16.5
Eric,H324,16.0,12.0
Anna,H415,11.0,17.0


## Sorting

In [11]:
df6 = df.sort_index()
df6

Unnamed: 0,ID,Math,Physics,Chemistry,Arts
Anna,H415,11.0,17.0,11,18
Ben,H324,14.0,16.5,16,19
Eric,H324,16.0,12.0,17,12
Ken,H143,19.0,13.5,19,18
Martin,H241,17.5,19.0,16,15


In [12]:
df6 = df.sort_values(['Math'], ascending=False)
df6

Unnamed: 0,ID,Math,Physics,Chemistry,Arts
Ken,H143,19.0,13.5,19,18
Martin,H241,17.5,19.0,16,15
Eric,H324,16.0,12.0,17,12
Ben,H324,14.0,16.5,16,19
Anna,H415,11.0,17.0,11,18


## Identify unique elements

In [33]:
df

Unnamed: 0,ID,Math,Physics,Chemistry,Arts
Martin,H241,17.5,19.0,16,15
Ken,H143,19.0,13.5,19,18
Ben,H324,14.0,16.5,16,19
Eric,H324,16.0,12.0,17,12
Anna,H415,11.0,17.0,11,18


In [34]:
a1 = df['ID'].unique().tolist()  # only returns unique elements
a1

['H241', 'H143', 'H324', 'H415']

In [14]:
a2 = df.loc['Ken'].unique().tolist()
a2

['H143', 19.0, 13.5, 18]

In [15]:
a3 = df['ID'].value_counts()   # returns unique elements and their counts
a3

ID
H324    2
H241    1
H143    1
H415    1
Name: count, dtype: int64

In [16]:
a4 = df.loc['Ken'].value_counts()
a4

Ken
19.0    2
H143    1
13.5    1
18      1
Name: count, dtype: int64

In [17]:
a5 = df.nunique(axis=1)
a5

Martin    5
Ken       4
Ben       5
Eric      4
Anna      4
dtype: int64

In [18]:
df

Unnamed: 0,ID,Math,Physics,Chemistry,Arts
Martin,H241,17.5,19.0,16,15
Ken,H143,19.0,13.5,19,18
Ben,H324,14.0,16.5,16,19
Eric,H324,16.0,12.0,17,12
Anna,H415,11.0,17.0,11,18


In [19]:
a6 = df.nunique(axis=0)
a6

ID           4
Math         5
Physics      5
Chemistry    4
Arts         4
dtype: int64

## Identify duplicates and remove them

In [29]:
new = pd.DataFrame({'ID': ['H143', 'H415', 'H143', 'H241', 'H143'],
                        'Math': [19, 11, 19, 17.5, 19],
                        'Physics': [13.5, 17, 13.5, 19, 13.5],
                        'Chemistry': [19, 11, 19, 16, 19],
                        'Arts': [18, 18, 18, 15, 18]}, index=['Julia', 'Anna', 'Julia', 'Martin', 'Julia'])
dfn = pd.concat([df, new], axis=0)
dfn

Unnamed: 0,ID,Math,Physics,Chemistry,Arts
Martin,H241,17.5,19.0,16,15
Ken,H143,19.0,13.5,19,18
Ben,H324,14.0,16.5,16,19
Eric,H324,16.0,12.0,17,12
Anna,H415,11.0,17.0,11,18
Julia,H143,19.0,13.5,19,18
Anna,H415,11.0,17.0,11,18
Julia,H143,19.0,13.5,19,18
Martin,H241,17.5,19.0,16,15
Julia,H143,19.0,13.5,19,18


In [36]:
dfn.duplicated()

Martin    False
Ken       False
Ben       False
Eric      False
Anna      False
Julia      True
Anna       True
Julia      True
Martin     True
Julia      True
dtype: bool

In [35]:
a1 = dfn.loc[dfn.duplicated()]
a1

Unnamed: 0,ID,Math,Physics,Chemistry,Arts
Julia,H143,19.0,13.5,19,18
Anna,H415,11.0,17.0,11,18
Julia,H143,19.0,13.5,19,18
Martin,H241,17.5,19.0,16,15
Julia,H143,19.0,13.5,19,18


In [22]:
a2 = dfn.loc[dfn['ID'].duplicated()]
a2

Unnamed: 0,ID,Math,Physics,Chemistry,Arts
Eric,H324,16.0,12.0,17,12
Julia,H143,19.0,13.5,19,18
Anna,H415,11.0,17.0,11,18
Julia,H143,19.0,13.5,19,18
Martin,H241,17.5,19.0,16,15
Julia,H143,19.0,13.5,19,18


In [22]:
a3 = df.loc[~df.duplicated()]
a3

Unnamed: 0,ID,Math,Physics,Chemistry,Arts
Martin,H241,17.5,19.0,16,15
Ken,H143,19.0,13.5,19,18
Ben,H324,14.0,16.5,16,19
Eric,H324,16.0,12.0,17,12
Anna,H415,11.0,17.0,11,18


In [23]:
a4 = dfn.loc[~dfn['ID'].duplicated()]
a4

Unnamed: 0,ID,Math,Physics,Chemistry,Arts
Martin,H241,17.5,19.0,16,15
Ken,H143,19.0,13.5,19,18
Ben,H324,14.0,16.5,16,19
Anna,H415,11.0,17.0,11,18


## Arithmatics on DataFrame

In [24]:
branch1 = pd.DataFrame({'Grocery': [12, 19, 14, 16, 11],
                   'Meat': [19, 13, 16, 12, 17],
                   'Dairy': [16, 19, 16, 17, 11],
                   'Bread': [15, 18, 19, 12, 18]}, index=['Jan', 'Feb', 'Mar', 'Apr', 'May'])
branch1

Unnamed: 0,Grocery,Meat,Dairy,Bread
Jan,12,19,16,15
Feb,19,13,19,18
Mar,14,16,16,19
Apr,16,12,17,12
May,11,17,11,18


In [25]:
branch2 = branch1.add(10)
branch3 = branch1.mul(1.2)
branch4 = branch1.sub(5)
branch5 = branch1.div(2)
branch2

Unnamed: 0,Grocery,Meat,Dairy,Bread
Jan,22,29,26,25
Feb,29,23,29,28
Mar,24,26,26,29
Apr,26,22,27,22
May,21,27,21,28


## Extract a part of DataFrame based on a condition

In [26]:
a1 = df.loc[df['Math']>13]
a1

Unnamed: 0,ID,Math,Physics,Chemistry,Arts
Martin,H241,17.5,19.0,16,15
Ken,H143,19.0,13.5,19,18
Ben,H324,14.0,16.5,16,19
Eric,H324,16.0,12.0,17,12


In [27]:
a2 = df.loc[(df['Math']>13) & (df['Arts']>15)]   # symbol for or is |
a2

Unnamed: 0,ID,Math,Physics,Chemistry,Arts
Ken,H143,19.0,13.5,19,18
Ben,H324,14.0,16.5,16,19


In [28]:
a3 = df.copy()
a3.loc[(a3['Math']>13) & (a3['Arts']>15), ['Math', 'Arts']] = [0, 1]
a3

Unnamed: 0,ID,Math,Physics,Chemistry,Arts
Martin,H241,17.5,19.0,16,15
Ken,H143,0.0,13.5,19,1
Ben,H324,0.0,16.5,16,1
Eric,H324,16.0,12.0,17,12
Anna,H415,11.0,17.0,11,18
