In [1]:
import pandas as pd

car_sales_df = pd.read_csv('../data/car-sales.csv')
car_sales_df.head()


Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
1,Honda,Red,87899,4,"$5,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"
3,BMW,Black,11179,5,"$22,000.00"
4,Nissan,White,213095,4,"$3,500.00"


## Viewing and Selecting

In [4]:
# head() - quick look at the first 5 rows of a dataframe
car_sales_df.head()

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
1,Honda,Red,87899,4,"$5,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"
3,BMW,Black,11179,5,"$22,000.00"
4,Nissan,White,213095,4,"$3,500.00"


In [5]:
# we can pass a number to the head() function to look at more rows
car_sales_df.head(7)

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
1,Honda,Red,87899,4,"$5,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"
3,BMW,Black,11179,5,"$22,000.00"
4,Nissan,White,213095,4,"$3,500.00"
5,Toyota,Green,99213,4,"$4,500.00"
6,Honda,Blue,45698,4,"$7,500.00"


In [3]:
# tail() - quick look at the last 5 rows of a dataframe
car_sales_df.tail()
car_sales_df.tail(7)


Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
5,Toyota,Green,99213,4,"$4,500.00"
6,Honda,Blue,45698,4,"$7,500.00"
7,Honda,Blue,54738,4,"$7,000.00"
8,Toyota,White,60000,4,"$6,250.00"
9,Nissan,White,31600,4,"$9,700.00"


## .loc and .iloc

In [6]:
animals_series = pd.Series(["cat", "dog", "bird", "fish", "snake"])
animals_series

0      cat
1      dog
2     bird
3     fish
4    snake
dtype: object

In [11]:
# we can add pass index
animals_series = pd.Series(["cat", "dog", "bird", "fish", "snake"], index=["a", "b", "a", "d", "a"])
animals_series

a      cat
b      dog
a     bird
d     fish
a    snake
dtype: object

In [None]:
animals_series.loc["a":"c"]


KeyError: "Cannot get left slice bound for non-unique label: 'a'"

In [14]:
animals_series = pd.Series(["cat", "dog", "bird", "fish", "snake"], index=[0, 1, 3, 3, 4])
animals_series

0      cat
1      dog
3     bird
3     fish
4    snake
dtype: object

In [15]:
# loc refers to the index labels
animals_series.loc[3]

3    bird
3    fish
dtype: object

In [16]:
car_sales_df

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
1,Honda,Red,87899,4,"$5,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"
3,BMW,Black,11179,5,"$22,000.00"
4,Nissan,White,213095,4,"$3,500.00"
5,Toyota,Green,99213,4,"$4,500.00"
6,Honda,Blue,45698,4,"$7,500.00"
7,Honda,Blue,54738,4,"$7,000.00"
8,Toyota,White,60000,4,"$6,250.00"
9,Nissan,White,31600,4,"$9,700.00"


In [17]:
car_sales_df.loc[3]

Make                    BMW
Colour                Black
Odometer (KM)         11179
Doors                     5
Price            $22,000.00
Name: 3, dtype: object

In [20]:
animals_series

0      cat
1      dog
3     bird
3     fish
4    snake
dtype: object

In [23]:
# iloc refers to the index position
animals_series.iloc[3]

'fish'

## Slicing

In [24]:
animals_series

0      cat
1      dog
3     bird
3     fish
4    snake
dtype: object

In [26]:
animals_series.iloc[:3]

0     cat
1     dog
3    bird
dtype: object

In [27]:
animals_series.iloc[3:]

3     fish
4    snake
dtype: object

In [28]:
animals_series.iloc[3:5]

3     fish
4    snake
dtype: object

In [29]:
# Select columns by name
car_sales_df["Make"]


0    Toyota
1     Honda
2    Toyota
3       BMW
4    Nissan
5    Toyota
6     Honda
7     Honda
8    Toyota
9    Nissan
Name: Make, dtype: object

In [30]:
car_sales_df[["Make", "Colour"]]

Unnamed: 0,Make,Colour
0,Toyota,White
1,Honda,Red
2,Toyota,Blue
3,BMW,Black
4,Nissan,White
5,Toyota,Green
6,Honda,Blue
7,Honda,Blue
8,Toyota,White
9,Nissan,White


## Filtering with boolean Indexing

In [31]:
car_sales_df

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
1,Honda,Red,87899,4,"$5,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"
3,BMW,Black,11179,5,"$22,000.00"
4,Nissan,White,213095,4,"$3,500.00"
5,Toyota,Green,99213,4,"$4,500.00"
6,Honda,Blue,45698,4,"$7,500.00"
7,Honda,Blue,54738,4,"$7,000.00"
8,Toyota,White,60000,4,"$6,250.00"
9,Nissan,White,31600,4,"$9,700.00"


In [32]:
car_sales_df[car_sales_df["Make"] == "Toyota"]

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"
5,Toyota,Green,99213,4,"$4,500.00"
8,Toyota,White,60000,4,"$6,250.00"


In [33]:
car_sales_df[car_sales_df["Colour"] == "Red"]

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
1,Honda,Red,87899,4,"$5,000.00"


In [34]:
car_sales_df[car_sales_df["Odometer (KM)"] > 100000]

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
4,Nissan,White,213095,4,"$3,500.00"
