In [4]:
import pandas as pd

# 2 main datatypes

## Series = 1-Dimensional

In [9]:
series = pd.Series(["BMW", "Toyota", "Honda"])
series

0       BMW
1    Toyota
2     Honda
dtype: object

In [4]:
colors = pd.Series(["Red", "Green", "Blue"])
colors

0      Red
1    Green
2     Blue
dtype: object

## DataFrame = 2-Dimensional

In [5]:
car_data = pd.DataFrame({"Car make" : series, "Color" : colors})
car_data

Unnamed: 0,Car make,Color
0,BMW,Red
1,Toyota,Green
2,Honda,Blue


# DataFrame

![](pandas-anatomy-of-a-dataframe.png)

<b>Import data frame from external file</b>

In [5]:
car_sale = pd.read_csv("car-sales.csv")
car_sale

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
1,Honda,Red,87899,4,"$5,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"
3,BMW,Black,11179,5,"$22,000.00"
4,Nissan,White,213095,4,"$3,500.00"
5,Toyota,Green,99213,4,"$4,500.00"
6,Honda,Blue,45698,4,"$7,500.00"
7,Honda,Blue,54738,4,"$7,000.00"
8,Toyota,White,60000,4,"$6,250.00"
9,Nissan,White,31600,4,"$9,700.00"


<b>Export Data Frame (without 'index=False')</b>

In [14]:
car_sale.to_csv("exported_car_sales_index_true.csv")

# Reimport DataFrame
car_sale_index_true = pd.read_csv("exported_car_sales_index_true.csv")
car_sale_index_true

Unnamed: 0.1,Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,0,Toyota,White,150043,4,"$4,000.00"
1,1,Honda,Red,87899,4,"$5,000.00"
2,2,Toyota,Blue,32549,3,"$7,000.00"
3,3,BMW,Black,11179,5,"$22,000.00"
4,4,Nissan,White,213095,4,"$3,500.00"
5,5,Toyota,Green,99213,4,"$4,500.00"
6,6,Honda,Blue,45698,4,"$7,500.00"
7,7,Honda,Blue,54738,4,"$7,000.00"
8,8,Toyota,White,60000,4,"$6,250.00"
9,9,Nissan,White,31600,4,"$9,700.00"


We got Unnamed column which contain index that not necessary because python list will automatically give it index

<b> Export Data Frame (with 'index=False')</b>

In [17]:
car_sale.to_csv("exported_car_sales_index_false.csv", index=False)

# Reimport DataFrame
car_sale_index_false = pd.read_csv("exported_car_sales_index_false.csv")
car_sale_index_false

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
1,Honda,Red,87899,4,"$5,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"
3,BMW,Black,11179,5,"$22,000.00"
4,Nissan,White,213095,4,"$3,500.00"
5,Toyota,Green,99213,4,"$4,500.00"
6,Honda,Blue,45698,4,"$7,500.00"
7,Honda,Blue,54738,4,"$7,000.00"
8,Toyota,White,60000,4,"$6,250.00"
9,Nissan,White,31600,4,"$9,700.00"


## Describing DataFrame

### Attributes

In [6]:
car_sale.dtypes

Make             object
Colour           object
Odometer (KM)     int64
Doors             int64
Price            object
dtype: object

In [8]:
car_sale.columns

Index(['Make', 'Colour', 'Odometer (KM)', 'Doors', 'Price'], dtype='object')

In [9]:
car_columns = car_sale.columns
car_columns

Index(['Make', 'Colour', 'Odometer (KM)', 'Doors', 'Price'], dtype='object')

In [11]:
car_sale.index

RangeIndex(start=0, stop=10, step=1)

In [12]:
car_sale

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
1,Honda,Red,87899,4,"$5,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"
3,BMW,Black,11179,5,"$22,000.00"
4,Nissan,White,213095,4,"$3,500.00"
5,Toyota,Green,99213,4,"$4,500.00"
6,Honda,Blue,45698,4,"$7,500.00"
7,Honda,Blue,54738,4,"$7,000.00"
8,Toyota,White,60000,4,"$6,250.00"
9,Nissan,White,31600,4,"$9,700.00"


### Functions

In [13]:
car_sale.describe()

Unnamed: 0,Odometer (KM),Doors
count,10.0,10.0
mean,78601.4,4.0
std,61983.471735,0.471405
min,11179.0,3.0
25%,35836.25,4.0
50%,57369.0,4.0
75%,96384.5,4.0
max,213095.0,5.0


In [14]:
car_sale.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Make           10 non-null     object
 1   Colour         10 non-null     object
 2   Odometer (KM)  10 non-null     int64 
 3   Doors          10 non-null     int64 
 4   Price          10 non-null     object
dtypes: int64(2), object(3)
memory usage: 532.0+ bytes


In [16]:
car_prices = pd.Series([1000, 4900, 3200])
car_prices

0    1000
1    4900
2    3200
dtype: int64

In [19]:
car_sale["Odometer (KM)"].mean()

np.float64(78601.4)

In [21]:
car_sale[["Doors", "Odometer (KM)"]].mean()

Doors                4.0
Odometer (KM)    78601.4
dtype: float64

In [20]:
car_sale["Doors"].sum()

np.int64(40)

## Viewing and Selecting Data

In [24]:
# Show first 5 rows
car_sale.head()

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
1,Honda,Red,87899,4,"$5,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"
3,BMW,Black,11179,5,"$22,000.00"
4,Nissan,White,213095,4,"$3,500.00"


In [25]:
car_sale.head(7)

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
1,Honda,Red,87899,4,"$5,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"
3,BMW,Black,11179,5,"$22,000.00"
4,Nissan,White,213095,4,"$3,500.00"
5,Toyota,Green,99213,4,"$4,500.00"
6,Honda,Blue,45698,4,"$7,500.00"


In [26]:
car_sale.tail()

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
5,Toyota,Green,99213,4,"$4,500.00"
6,Honda,Blue,45698,4,"$7,500.00"
7,Honda,Blue,54738,4,"$7,000.00"
8,Toyota,White,60000,4,"$6,250.00"
9,Nissan,White,31600,4,"$9,700.00"


In [27]:
car_sale.tail(4)

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
6,Honda,Blue,45698,4,"$7,500.00"
7,Honda,Blue,54738,4,"$7,000.00"
8,Toyota,White,60000,4,"$6,250.00"
9,Nissan,White,31600,4,"$9,700.00"


<b>Usage of .loc and .iloc</b>
- .loc is refers to index
- .iloc is refers to postion

In [29]:
animals = pd.Series(["bird", "snake", "panda", "fish", "cat"], 
                    index=[1, 3, 6, 9, 3])
animals

1     bird
3    snake
6    panda
9     fish
3      cat
dtype: object

In [30]:
animals.loc[9]

'fish'

In [31]:
animals.loc[3]

3    snake
3      cat
dtype: object

In [32]:
car_sale.loc[3]

Make                    BMW
Colour                Black
Odometer (KM)         11179
Doors                     5
Price            $22,000.00
Name: 3, dtype: object

In [33]:
animals.iloc[2]

'panda'

In [34]:
animals.iloc[[2, 4]]

6    panda
3      cat
dtype: object

In [36]:
animals.iloc[:3]

1     bird
3    snake
6    panda
dtype: object

In [37]:
car_sale.iloc[:4]

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
1,Honda,Red,87899,4,"$5,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"
3,BMW,Black,11179,5,"$22,000.00"


In [38]:
car_sale.Make

0    Toyota
1     Honda
2    Toyota
3       BMW
4    Nissan
5    Toyota
6     Honda
7     Honda
8    Toyota
9    Nissan
Name: Make, dtype: object

In [39]:
car_sale["Odometer (KM)"]

0    150043
1     87899
2     32549
3     11179
4    213095
5     99213
6     45698
7     54738
8     60000
9     31600
Name: Odometer (KM), dtype: int64

In [41]:
car_sale[car_sale["Make"] == "Toyota"]

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"
5,Toyota,Green,99213,4,"$4,500.00"
8,Toyota,White,60000,4,"$6,250.00"


In [None]:
car_sale[car_sale["Odometer (KM)"