In [1]:
passenger_data = {
        "Name": [
            "Braund, Mr. Owen Harris",
            "Allen, Mr. William Henry",
            "Bonnell, Miss. Elizabeth",
        ],
        "Age": [22, 35, 58],
        "Sex": ["male", "male", "female"],
    }

In [2]:
In [1]: import pandas as pd

In [3]:
df = pd.DataFrame(passenger_data)

In [4]:
df

Unnamed: 0,Name,Age,Sex
0,"Braund, Mr. Owen Harris",22,male
1,"Allen, Mr. William Henry",35,male
2,"Bonnell, Miss. Elizabeth",58,female


In [5]:
df["Name"]

0     Braund, Mr. Owen Harris
1    Allen, Mr. William Henry
2    Bonnell, Miss. Elizabeth
Name: Name, dtype: object

In [6]:
ages = df["Age"]
ages

0    22
1    35
2    58
Name: Age, dtype: int64

In [7]:
ages.max()

58

In [8]:
df.describe()

Unnamed: 0,Age
count,3.0
mean,38.333333
std,18.230012
min,22.0
25%,28.5
50%,35.0
75%,46.5
max,58.0


In [9]:
type(df["Age"])

pandas.core.series.Series

In [10]:
type(df)

pandas.core.frame.DataFrame

In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    3 non-null      object
 1   Age     3 non-null      int64 
 2   Sex     3 non-null      object
dtypes: int64(1), object(2)
memory usage: 200.0+ bytes


# loc and iloc

In [12]:
df1 = pd.DataFrame([[1, 2], [4, 5], [7, 8]],
                  index=['cobra', 'viper', 'sidewinder'],
                  columns=['max_speed', 'shield'])

In [13]:
df1

Unnamed: 0,max_speed,shield
cobra,1,2
viper,4,5
sidewinder,7,8


In [14]:
df1.loc['viper'] #Single label. Note this returns the row as a Series.

max_speed    4
shield       5
Name: viper, dtype: int64

In [15]:
df1.loc[['viper','sidewinder']] #List of labels. Note using [[]] returns a DataFrame.

Unnamed: 0,max_speed,shield
viper,4,5
sidewinder,7,8


In [16]:
#Single label for row and column
df1.loc['cobra', 'shield']

2

In [17]:
#Slice with labels for row and single label for column
df1.loc['cobra':'viper', 'max_speed']

cobra    1
viper    4
Name: max_speed, dtype: int64

In [18]:
# Boolean list with the same length as the row axis
df1.loc[[False, False, True]]

Unnamed: 0,max_speed,shield
sidewinder,7,8


In [19]:
# Index (same behavior as df.reindex)
df1.loc[pd.Index(['cobra','viper'], name='snake')]

Unnamed: 0_level_0,max_speed,shield
snake,Unnamed: 1_level_1,Unnamed: 2_level_1
cobra,1,2
viper,4,5


In [20]:
# Conditional that returns a boolean Series
df1.loc[df1['shield']>6]

Unnamed: 0,max_speed,shield
sidewinder,7,8


In [21]:
# Conditional that returns a boolean Series with column labels specified
df1.loc[df1['shield']>6, ['max_speed']]

Unnamed: 0,max_speed
sidewinder,7


In [22]:
# Multiple conditional using & that returns a boolean Series
df1.loc[(df1['max_speed']>1) & (df1['shield']<8)]

Unnamed: 0,max_speed,shield
viper,4,5


In [28]:
# Multiple conditional using | that returns a boolean Series
df1.loc[(df1['max_speed'] > 4) | (df1['shield'] < 5)]

Unnamed: 0,max_speed,shield
cobra,1,2
sidewinder,7,50


In [29]:
# Callable that returns a boolean Series
df1.loc[lambda df1: df1['shield'] == 8]

Unnamed: 0,max_speed,shield


# Setting values

In [30]:
# Set value for all items matching the list of labels
df1.loc[['viper', 'sidewinder'], ['shield']] = 50

In [31]:
df1

Unnamed: 0,max_speed,shield
cobra,1,2
viper,4,50
sidewinder,7,50


In [32]:
# Set value for an entire row
df1.loc['cobra'] = 100

In [33]:
df1

Unnamed: 0,max_speed,shield
cobra,100,100
viper,4,50
sidewinder,7,50


In [34]:
# Set value for an entire column
df1.loc[:, 'max_speed'] = 200

In [35]:
df1

Unnamed: 0,max_speed,shield
cobra,200,100
viper,200,50
sidewinder,200,50


# iloc

In [37]:
mydict = [{'a': 1, 'b': 2, 'c': 3, 'd': 4},
          {'a': 100, 'b': 200, 'c': 300, 'd': 400},
          {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000}]

df2 = pd.DataFrame(mydict)

In [38]:
df2

Unnamed: 0,a,b,c,d
0,1,2,3,4
1,100,200,300,400
2,1000,2000,3000,4000


In [39]:
type(df2.iloc[0]) #series

pandas.core.series.Series

In [40]:
# Indexing just the rows

# With a scalar integer.
df2.iloc[0] # shows 0th row

a    1
b    2
c    3
d    4
Name: 0, dtype: int64

In [43]:
# With a list of integers.

print(df2.iloc[[0]])
type(df2.iloc[[0]]) # dataframe

   a  b  c  d
0  1  2  3  4


pandas.core.frame.DataFrame

In [44]:
df2.iloc[[0,1]]

Unnamed: 0,a,b,c,d
0,1,2,3,4
1,100,200,300,400


In [45]:
# with slice objects
df2.iloc[:3]

Unnamed: 0,a,b,c,d
0,1,2,3,4
1,100,200,300,400
2,1000,2000,3000,4000


In [46]:
# With a boolean mask the same length as the index.
df2.iloc[[True,False,True]]

Unnamed: 0,a,b,c,d
0,1,2,3,4
2,1000,2000,3000,4000


In [48]:
# With a callable, useful in method chains.
# The x passed to the lambda is the DataFrame being sliced. This selects the rows whose index label even.

df2.iloc[lambda x: x.index%2==0]

Unnamed: 0,a,b,c,d
0,1,2,3,4
2,1000,2000,3000,4000


In [49]:
# Indexing both axes
# With scalar integers.
df2.iloc[0, 0]

1

In [50]:
df2.iloc[0, 1]

2

In [52]:
# With lists of integers.
df2.iloc[[0, 1],[2, 3]]

Unnamed: 0,c,d
0,3,4
1,300,400


In [54]:
# With slice objects.
df2.iloc[:2,1:3]

Unnamed: 0,b,c
0,2,3
1,200,300


In [55]:
# With a boolean array whose length matches the columns.

df2.iloc[:, [True, False, True, False]]

Unnamed: 0,a,c
0,1,3
1,100,300
2,1000,3000


In [56]:
# With a callable function that expects the Series or DataFrame.
df2.iloc[:, lambda df2: [0, 2]]

Unnamed: 0,a,c
0,1,3
1,100,300
2,1000,3000
