In [46]:
import pandas as pd

## Series

In [47]:
pd.Series([4, 6, -5, 3])

0    4
1    6
2   -5
3    3
dtype: int64

In [48]:
pd.Series([4, 6, -5, 3], index=['x1', 'x2', 'y1', 'y2'])

x1    4
x2    6
y1   -5
y2    3
dtype: int64

In [49]:
bounding_box = pd.Series([4, 6, -5, 3], index=['x1', 'x2', 'y1', 'y2'])
print(bounding_box['x1'])
bounding_box[['x1', 'y1']]

4


x1    4
y1   -5
dtype: int64

In [50]:
bounding_box / bounding_box.sum()

x1    0.500
x2    0.750
y1   -0.625
y2    0.375
dtype: float64

In [51]:
missing_data = pd.Series([4, 6, -5, None], index=['x1', 'x2', 'y1', 'y2'])

pd.isnull(missing_data)

x1    False
x2    False
y1    False
y2     True
dtype: bool

## DataFrame

In [52]:
data = {
    'name': ['Maria', 'Carla', 'Juan', 'Ana', 'Sergio'],
    'age': [15, 33, 12, 21, 45],
    'gender': [True, True, False, True, False]
}

pd.DataFrame(data)

Unnamed: 0,name,age,gender
0,Maria,15,True
1,Carla,33,True
2,Juan,12,False
3,Ana,21,True
4,Sergio,45,False


## Read from csv

In [53]:
#pd.read_csv('data/iris.data', header=None).head()

## Index

In [54]:
df = pd.DataFrame(data)
df.index

RangeIndex(start=0, stop=5, step=1)

In [55]:
df = pd.DataFrame(data)
df.index = ['m','c','j','a','s']
df

Unnamed: 0,name,age,gender
m,Maria,15,True
c,Carla,33,True
j,Juan,12,False
a,Ana,21,True
s,Sergio,45,False


## Indexing

In [56]:
a = pd.DataFrame(data, index=['m','c','j','a','s'])

a.loc['m'] == a.iloc[0]

name      True
age       True
gender    True
Name: m, dtype: bool

In [57]:
a.loc['m':'c']

Unnamed: 0,name,age,gender
m,Maria,15,True
c,Carla,33,True


## Dropping

In [67]:
print(df)

df.drop('name', axis=1)

     name  age  gender
0   Maria   15    True
1   Carla   33    True
2    Juan   12   False
3     Ana   21    True
4  Sergio   45   False


Unnamed: 0,age,gender
0,15,True
1,33,True
2,12,False
3,21,True
4,45,False


In [68]:
print(a)

a.drop(['m', 'c'])

     name  age  gender
m   Maria   15    True
c   Carla   33    True
j    Juan   12   False
a     Ana   21    True
s  Sergio   45   False


Unnamed: 0,name,age,gender
j,Juan,12,False
a,Ana,21,True
s,Sergio,45,False


## Duplicates

In [60]:
df = pd.DataFrame({'f1': [1, 2, 2, 1], 'f2': [0, 1, 1, 1]})
print(df)
df.drop_duplicates()

   f1  f2
0   1   0
1   2   1
2   2   1
3   1   1


Unnamed: 0,f1,f2
0,1,0
1,2,1
3,1,1


## Filtering

In [61]:
df = pd.DataFrame(data, index=['m','c','j','a','s'])
df[df.gender == False]

Unnamed: 0,name,age,gender
j,Juan,12,False
s,Sergio,45,False


## Custom element-wise operations

In [62]:
df['gender'] = df['gender'].apply(lambda x: not x)
df

Unnamed: 0,name,age,gender
m,Maria,15,False
c,Carla,33,False
j,Juan,12,True
a,Ana,21,False
s,Sergio,45,True


## Sorting

In [71]:
print(df)
print('\n')
print(df.sort_values(by='age'))



     name  age  gender
0   Maria   15    True
1   Carla   33    True
2    Juan   12   False
3     Ana   21    True
4  Sergio   45   False


     name  age  gender
2    Juan   12   False
0   Maria   15    True
3     Ana   21    True
1   Carla   33    True
4  Sergio   45   False


## Statistics

In [34]:
df.describe()

Unnamed: 0,age
count,5.0
mean,25.2
std,13.682105
min,12.0
25%,15.0
50%,21.0
75%,33.0
max,45.0


## Counts

In [35]:
df['gender'].value_counts()

True     3
False    2
Name: gender, dtype: int64