In [2]:
import pandas as pd

df = pd.DataFrame(data= {'name':['john', 'mary', 'peter','jeff','bill', 'lisa'], 'age':[23, 78, 22, 19, 45, 33], 
                         'state': ['iowa', 'dc', 'california', 'texas', 'washington', 'dc'], 'num_children': [2, 2, 0, 1, 2, 1],
                        'num_pets' : [0, 4, 0, 5, 0, 0]})

In [3]:
df

Unnamed: 0,name,age,state,num_children,num_pets
0,john,23,iowa,2,0
1,mary,78,dc,2,4
2,peter,22,california,0,0
3,jeff,19,texas,1,5
4,bill,45,washington,2,0
5,lisa,33,dc,1,0


In [4]:
df.columns

Index(['name', 'age', 'state', 'num_children', 'num_pets'], dtype='object')

If we are interested in only the first three columns...

In [5]:
df[['name', 'age', 'state']]

Unnamed: 0,name,age,state
0,john,23,iowa
1,mary,78,dc
2,peter,22,california
3,jeff,19,texas
4,bill,45,washington
5,lisa,33,dc


Select all rows, grabbing information from columns name and age

In [6]:
df.loc[:, ['name', 'age']]

Unnamed: 0,name,age
0,john,23
1,mary,78
2,peter,22
3,jeff,19
4,bill,45
5,lisa,33


Select the last two rows

In [7]:
df.iloc[-2:]

Unnamed: 0,name,age,state,num_children,num_pets
4,bill,45,washington,2,0
5,lisa,33,dc,1,0


Select the first column of all rows

In [18]:
df.iloc[:, 0]

0     john
1     mary
2    peter
3     jeff
4     bill
5     lisa
Name: name, dtype: object

In [19]:
df[df['age'] > 30]

Unnamed: 0,name,age,state,num_children,num_pets
1,mary,78,dc,2,4
4,bill,45,washington,2,0
5,lisa,33,dc,1,0


In [23]:
arg_pets_more_than_children = df["num_pets"] > df["num_children"]

df[arg_pets_more_than_children]

Unnamed: 0,name,age,state,num_children,num_pets
1,mary,78,dc,2,4
3,jeff,19,texas,1,5


In [39]:
df[ (df['age'] > 40) & (df['num_pets'])]

Unnamed: 0,name,age,state,num_children,num_pets


In [40]:
df[ (df['age'] > 40) | (df['num_pets'])]

Unnamed: 0,name,age,state,num_children,num_pets
1,mary,78,dc,2,4
3,jeff,19,texas,1,5
4,bill,45,washington,2,0


Drop age and num_children columns

In [50]:
df.drop(columns=["age", "num_children"])

Unnamed: 0,name,state,num_pets
0,john,iowa,0
1,mary,dc,4
2,peter,california,0
3,jeff,texas,5
4,bill,washington,0
5,lisa,dc,0


In [46]:
df.drop(['age', 'num_children'], axis=1)

Unnamed: 0,name,state,num_pets
0,john,iowa,0
1,mary,dc,4
2,peter,california,0
3,jeff,texas,5
4,bill,washington,0
5,lisa,dc,0


In [42]:
df

Unnamed: 0,name,age,state,num_children,num_pets
0,john,23,iowa,2,0
1,mary,78,dc,2,4
2,peter,22,california,0,0
3,jeff,19,texas,1,5
4,bill,45,washington,2,0
5,lisa,33,dc,1,0


In [47]:
sum(df['age']) / len(df['age'])

36.666666666666664

In [48]:
sum(df['num_pets']) / len(df['num_pets'])

1.5

In [None]:
sum(df['num_children']) / len(df['num_children'])

In [49]:
df.drop(0, axis=0)

Unnamed: 0,name,age,state,num_children,num_pets
1,mary,78,dc,2,4
2,peter,22,california,0,0
3,jeff,19,texas,1,5
4,bill,45,washington,2,0
5,lisa,33,dc,1,0


In [51]:
df['age'].mean()

36.666666666666664

In [52]:
df['num_children'].mean()

1.3333333333333333

In [53]:
df[['age', 'num_children', 'num_pets']].mean()

age             36.666667
num_children     1.333333
num_pets         1.500000
dtype: float64

In [63]:
df.age.describe()

count    6.000000
mean     1.500000
std      2.345208
min      0.000000
25%      0.000000
50%      0.000000
75%      3.000000
max      5.000000
Name: num_pets, dtype: float64

In [62]:
import numpy as np
df[['age', 'num_pets', 'num_children']].apply(lambda col: np.mean(col), axis=1)

0     8.333333
1    28.000000
2     7.333333
3     8.333333
4    15.666667
5    11.333333
dtype: float64

In [64]:
type(df['age'])

pandas.core.series.Series

In [69]:
df['age'].apply(lambda x: x * 2)

0     92
1    312
2     88
3     76
4    180
5    132
Name: age, dtype: int64

In [66]:
df['age'] = df['age'].apply(lambda x: x * 2)

In [67]:
df

Unnamed: 0,name,age,state,num_children,num_pets
0,john,46,iowa,2,0
1,mary,156,dc,2,4
2,peter,44,california,0,0
3,jeff,38,texas,1,5
4,bill,90,washington,2,0
5,lisa,66,dc,1,0


Return a sliced ages dataframe in ascending order

In [75]:
df['age'].sort_values(ascending=True)

3     38
2     44
0     46
5     66
4     90
1    156
Name: age, dtype: int64

In [76]:
df.sort_values('age', ascending=True)

Unnamed: 0,name,age,state,num_children,num_pets
3,jeff,38,texas,1,5
2,peter,44,california,0,0
0,john,46,iowa,2,0
5,lisa,66,dc,1,0
4,bill,90,washington,2,0
1,mary,156,dc,2,4


In [82]:
df[df['name'].apply(lambda x: x[0] == 'j')]

Unnamed: 0,name,age,state,num_children,num_pets
0,john,46,iowa,2,0
3,jeff,38,texas,1,5


In [81]:
df[df.name.str.startswith('j')]

Unnamed: 0,name,age,state,num_children,num_pets
0,john,46,iowa,2,0
3,jeff,38,texas,1,5


In [94]:
from collections import OrderedDict
from pandas import DataFrame
import pandas as pd
import numpy as np

table = OrderedDict((
    ("Item", ['Item0', 'Item0', 'Item1', 'Item1']),
    ('CType',['Gold', 'Bronze', 'Gold', 'Silver']),
    ('USD',  ['1$', '2$', '3$', '4$']),
    ('EU',   ['1€', '2€', '3€', '4€'])
))
d = DataFrame(table)

In [98]:
p = d.pivot(index='Item', columns='CType', values='USD'); p

CType,Bronze,Gold,Silver
Item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Item0,2$,1$,
Item1,,3$,4$
