In [1]:
import pandas as pd
import numpy as np
from string import ascii_lowercase

In [2]:
data = {'animal': ['cat', 'cat', 'snake', 'dog', 'dog', 'cat', 'snake', 'cat', 'dog', 'dog'],
        'age': [2.5, 3, 0.5, np.nan, 5, 2, 4.5, np.nan, 7, 3],
        'visits': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
        'priority': ['yes', 'yes', 'no', 'yes', 'no', 'no', 'no', 'yes', 'no', 'no']}

labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']

In [3]:
df = pd.DataFrame(data, index=labels)
df

Unnamed: 0,age,animal,priority,visits
a,2.5,cat,yes,1
b,3.0,cat,yes,3
c,0.5,snake,no,2
d,,dog,yes,3
e,5.0,dog,no,2
f,2.0,cat,no,3
g,4.5,snake,no,1
h,,cat,yes,1
i,7.0,dog,no,2
j,3.0,dog,no,1


In [4]:
# select by index then select 2 columns
df.loc[df.index[[3, 4, 8]], ['animal', 'age']]

Unnamed: 0,animal,age
d,dog,
e,dog,5.0
i,dog,7.0


In [5]:
# get mean age for each animal
df.groupby(['animal']).mean()['age']
#df.groupby('animal')['age'].mean()

animal
cat      2.5
dog      5.0
snake    2.5
Name: age, dtype: float64

In [6]:
df.loc['k'] = [5.5, 'dog', 'no', 2]

In [7]:
df

Unnamed: 0,age,animal,priority,visits
a,2.5,cat,yes,1
b,3.0,cat,yes,3
c,0.5,snake,no,2
d,,dog,yes,3
e,5.0,dog,no,2
f,2.0,cat,no,3
g,4.5,snake,no,1
h,,cat,yes,1
i,7.0,dog,no,2
j,3.0,dog,no,1


In [8]:
df['animal'].value_counts()

dog      5
cat      4
snake    2
Name: animal, dtype: int64

In [9]:
df1 = pd.DataFrame(df['age'].value_counts().T)
cols = sorted(df1.columns.tolist())

In [None]:
lst = []
for x in range(10):
    tup = x, x+1
    lst.append(tup)

In [20]:
index = pd.MultiIndex.from_tuples([ ('d',1), ('d', 2), ('d', 3)], names=['n','v'])
index

MultiIndex(levels=[['d'], [1, 2, 3]],
           labels=[[0, 0, 0], [0, 1, 2]],
           names=['n', 'v'])

In [94]:
df2 = pd.DataFrame(lst, index=[ascii_lowercase[x]+' name' for x in range(10)])
df2.rename(columns={0:'zero', 1: 'one'})

Unnamed: 0,zero,one
a name,0,1
b name,1,2
c name,2,3
d name,3,4
e name,4,5
f name,5,6
g name,6,7
h name,7,8
i name,8,9
j name,9,10


In [96]:
# selects rows where age is between 2 and 4 inclusive
df[df['age'].between(2, 4)]

Unnamed: 0,age,animal,priority,visits
a,2.5,cat,yes,1
b,3.0,cat,yes,3
f,2.0,cat,no,3
j,3.0,dog,no,1


In [99]:
# see row f col age before in the df above
df.loc['f', 'age'] = 1.5
df

Unnamed: 0,age,animal,priority,visits
a,2.5,cat,yes,1
b,3.0,cat,yes,3
c,0.5,snake,no,2
d,,dog,yes,3
e,5.0,dog,no,2
f,1.5,cat,no,3
g,4.5,snake,no,1
h,,cat,yes,1
i,7.0,dog,no,2
j,3.0,dog,no,1


In [100]:
# change values in columns
df['priority'] = df['priority'].map({'yes': True, 'no': False})
df

Unnamed: 0,age,animal,priority,visits
a,2.5,cat,True,1
b,3.0,cat,True,3
c,0.5,snake,False,2
d,,dog,True,3
e,5.0,dog,False,2
f,1.5,cat,False,3
g,4.5,snake,False,1
h,,cat,True,1
i,7.0,dog,False,2
j,3.0,dog,False,1


In [12]:
df.loc['k'] = [5.5, 'dog', 'no', 2]
df.loc['j'] = [5.5, 'dog', 'no', 2]
df

Unnamed: 0,age,animal,priority,visits
a,2.5,cat,yes,1
b,3.0,cat,yes,3
c,0.5,snake,no,2
d,,dog,yes,3
e,5.0,dog,no,2
f,2.0,cat,no,3
g,4.5,snake,no,1
h,,cat,yes,1
i,7.0,dog,no,2
j,5.5,dog,no,2


In [15]:
# keep first occurrence of duplcate -- note j is gone 
df.drop_duplicates(keep="first") # last or False (keep no dupes) is optarg too

Unnamed: 0,age,animal,priority,visits
a,2.5,cat,yes,1
b,3.0,cat,yes,3
c,0.5,snake,no,2
d,,dog,yes,3
e,5.0,dog,no,2
f,2.0,cat,no,3
g,4.5,snake,no,1
h,,cat,yes,1
i,7.0,dog,no,2
j,5.5,dog,no,2
