### Import data

In [10]:
import pandas as pd

df = pd.DataFrame(data= {'name':['john', 'mary', 'peter','jeff','bill', 'lisa'], 'age':[23, 78, 22, 19, 45, 33], 
                         'state': ['iowa', 'dc', 'california', 'texas', 'washington', 'dc'], 'num_children': [2, 2, 0, 1, 2, 1],
                        'num_pets' : [0, 4, 0, 5, 0, 0]})
    

In [46]:
df

Unnamed: 0,name,age,state,num_children,num_pets
0,john,23,iowa,2,0
1,mary,78,dc,2,4
2,peter,22,california,0,0
3,jeff,19,texas,1,5
4,bill,45,washington,2,0
5,lisa,33,dc,1,0


In [47]:
df.columns

Index(['name', 'age', 'state', 'num_children', 'num_pets'], dtype='object')

In [48]:
df[['name', 'age', 'state']]

Unnamed: 0,name,age,state
0,john,23,iowa
1,mary,78,dc
2,peter,22,california
3,jeff,19,texas
4,bill,45,washington
5,lisa,33,dc


In [83]:
df.loc[2:4, 'name']

2    peter
3     jeff
4     bill
Name: name, dtype: object

In [84]:
df.loc[:, ['name', 'age']]

Unnamed: 0,name,age
0,john,46
1,mary,156
2,peter,44
3,jeff,38
4,bill,90
5,lisa,66


In [85]:
# select the first 2 rows
df.iloc[:2]

Unnamed: 0,name,age,state,num_children,num_pets
0,john,46,iowa,2,0
1,mary,156,dc,2,4


In [86]:
# select the last 2 rows
df.iloc[-2:]

Unnamed: 0,name,age,state,num_children,num_pets
4,bill,90,washington,2,0
5,lisa,66,dc,1,0


In [87]:
# select rows up to and including the one
# with index=2 (this retrieves 3 rows)
df.loc[:2]

Unnamed: 0,name,age,state,num_children,num_pets
0,john,46,iowa,2,0
1,mary,156,dc,2,4
2,peter,44,california,0,0


In [88]:
# first column of data frame
df.iloc[:,0]

0     john
1     mary
2    peter
3     jeff
4     bill
5     lisa
Name: name, dtype: object

In [89]:
df.loc[:,['age', 'state']]
# df[['age', 'state']]

Unnamed: 0,age,state
0,46,iowa
1,156,dc
2,44,california
3,38,texas
4,90,washington
5,66,dc


In [90]:
# second row of dataframe
df.iloc[1] 

name            mary
age              156
state             dc
num_children       2
num_pets           4
Name: 1, dtype: object

In [91]:
# people whose "age" is greater than 30
df[df['age'] > 30]

Unnamed: 0,name,age,state,num_children,num_pets
0,john,46,iowa,2,0
1,mary,156,dc,2,4
2,peter,44,california,0,0
3,jeff,38,texas,1,5
4,bill,90,washington,2,0
5,lisa,66,dc,1,0


In [92]:
df[df.age > 30]

Unnamed: 0,name,age,state,num_children,num_pets
0,john,46,iowa,2,0
1,mary,156,dc,2,4
2,peter,44,california,0,0
3,jeff,38,texas,1,5
4,bill,90,washington,2,0
5,lisa,66,dc,1,0


In [93]:
# people who have more pets than children
df[ df["num_pets"] > df[ "num_children"] ]

Unnamed: 0,name,age,state,num_children,num_pets
1,mary,156,dc,2,4
3,jeff,38,texas,1,5


In [94]:
# people older than 40 who own pets
df[ (df["age"] > 40) & (df["num_pets"] > 0) ] 

Unnamed: 0,name,age,state,num_children,num_pets
1,mary,156,dc,2,4


In [95]:
# df itself is not modified; a copy is returned instead
df.drop(["age","num_children"],axis=1)

Unnamed: 0,name,state,num_pets
0,john,iowa,0
1,mary,dc,4
2,peter,california,0
3,jeff,texas,5
4,bill,washington,0
5,lisa,dc,0


In [96]:
df

Unnamed: 0,name,age,state,num_children,num_pets
0,john,46,iowa,2,0
1,mary,156,dc,2,4
2,peter,44,california,0,0
3,jeff,38,texas,1,5
4,bill,90,washington,2,0
5,lisa,66,dc,1,0


## Getting the mean

In [97]:
df.describe()

Unnamed: 0,age,num_children,num_pets
count,6.0,6.0,6.0
mean,73.333333,1.333333,1.5
std,44.769037,0.816497,2.345208
min,38.0,0.0,0.0
25%,44.5,1.0,0.0
50%,56.0,1.5,0.0
75%,84.0,2.0,3.0
max,156.0,2.0,5.0


In [104]:
df[['age','num_pets','num_children']].mean()

age             36.666667
num_pets         1.500000
num_children     1.333333
dtype: float64

In [64]:
import numpy as np
df[["age","num_pets","num_children"]].apply(lambda col: np.mean(col),axis=0)

age             36.666667
num_pets         1.500000
num_children     1.333333
dtype: float64

In [4]:
df[['age']]

Unnamed: 0,age
0,23
1,78
2,22
3,19
4,45
5,33


# Double age without modifying

In [12]:
new_df = df.copy()
new_df['age'] = new_df[['age']].apply(lambda value: value * 2)
new_df

Unnamed: 0,name,age,state,num_children,num_pets
0,john,46,iowa,2,0
1,mary,156,dc,2,4
2,peter,44,california,0,0
3,jeff,38,texas,1,5
4,bill,90,washington,2,0
5,lisa,66,dc,1,0


In [13]:
df

Unnamed: 0,name,age,state,num_children,num_pets
0,john,23,iowa,2,0
1,mary,78,dc,2,4
2,peter,22,california,0,0
3,jeff,19,texas,1,5
4,bill,45,washington,2,0
5,lisa,33,dc,1,0


### Sorting and filtering

In [14]:
df.sort_values(by=['age'])

Unnamed: 0,name,age,state,num_children,num_pets
3,jeff,19,texas,1,5
2,peter,22,california,0,0
0,john,23,iowa,2,0
5,lisa,33,dc,1,0
4,bill,45,washington,2,0
1,mary,78,dc,2,4


In [16]:
df[df.name.str.startswith('j')]

Unnamed: 0,name,age,state,num_children,num_pets
0,john,23,iowa,2,0
3,jeff,19,texas,1,5


### Creating a new DataFrame with keys and values rearranged

In [20]:
from collections import OrderedDict
from pandas import DataFrame
import pandas as pd
import numpy as np

table = OrderedDict((
    ("Item", ['Item0', 'Item0', 'Item1', 'Item1']),
    ('CType',['Gold', 'Bronze', 'Gold', 'Silver']),
    ('USD',  ['1$', '2$', '3$', '4$']),
    ('EU',   ['1€', '2€', '3€', '4€'])
))
d = DataFrame(table)
d

Unnamed: 0,Item,CType,USD,EU
0,Item0,Gold,1$,1€
1,Item0,Bronze,2$,2€
2,Item1,Gold,3$,3€
3,Item1,Silver,4$,4€


In [19]:
d.pivot(index='Item', columns='CType', values='USD')

CType,Bronze,Gold,Silver
Item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Item0,2$,1$,
Item1,,3$,4$
