In [1]:
from typing import TypeVar


T = TypeVar('T')

def pr(val : T , title:str|None = None) : # type: ignore
    if title != None :
        print(f" ---------------------- {title} ---------------------- ")
    print('type: ' , type(val))
    print(val , '\n')
    # return val

In [2]:
import pandas as pd
df = pd.read_csv('./data/car-sales.csv') # type: ignore
pr(df , 'car_sales.csv')


 ---------------------- car_sales.csv ---------------------- 
type:  <class 'pandas.core.frame.DataFrame'>
     Make  Color  Odometer (KM)  Doors       Price
0  Toyota  White         150043      4   $4,000.00
1   Honda    Red          87899      4   $5,000.00
2  Toyota   Blue          32549      3   $7,000.00
3     BMW  Black          11179      5  $22,000.00
4  Nissan  White         213095      4   $3,500.00
5  Toyota  Green          99213      4   $4,500.00
6   Honda   Blue          45698      4   $7,500.00
7   Honda   Blue          54738      4   $7,000.00
8  Toyota  White          60000      4   $6,250.00
9  Nissan  White          31600      4   $9,700.00 



## Viewing and selecting data

In [3]:
head = df.head(7)
pr(head , 'return the first 7 rows')
tail = df.tail(7)
pr(tail , 'return the last 7 rows')

 ---------------------- return the first 7 rows ---------------------- 
type:  <class 'pandas.core.frame.DataFrame'>
     Make  Color  Odometer (KM)  Doors       Price
0  Toyota  White         150043      4   $4,000.00
1   Honda    Red          87899      4   $5,000.00
2  Toyota   Blue          32549      3   $7,000.00
3     BMW  Black          11179      5  $22,000.00
4  Nissan  White         213095      4   $3,500.00
5  Toyota  Green          99213      4   $4,500.00
6   Honda   Blue          45698      4   $7,500.00 

 ---------------------- return the last 7 rows ---------------------- 
type:  <class 'pandas.core.frame.DataFrame'>
     Make  Color  Odometer (KM)  Doors       Price
3     BMW  Black          11179      5  $22,000.00
4  Nissan  White         213095      4   $3,500.00
5  Toyota  Green          99213      4   $4,500.00
6   Honda   Blue          45698      4   $7,500.00
7   Honda   Blue          54738      4   $7,000.00
8  Toyota  White          60000      4   $6,250.00


In [4]:
# what's the different between .loc and .iloc methods
# you could think of loc as shortcut for location
animals = pd.Series(
    data = ['dog' , 'cat' , 'pandas' , 'snake'] , 
    index = [0,3,5,3]
    )
pr(animals, 'animals')
pr(animals.loc[3] , 'get all animals that have index 3')
pr(animals.loc[5] , 'get all animals that have index 5')

 ---------------------- animals ---------------------- 
type:  <class 'pandas.core.series.Series'>
0       dog
3       cat
5    pandas
3     snake
dtype: object 

 ---------------------- get all animals that have index 3 ---------------------- 
type:  <class 'pandas.core.series.Series'>
3      cat
3    snake
dtype: object 

 ---------------------- get all animals that have index 5 ---------------------- 
type:  <class 'str'>
pandas 



In [5]:
pr(df , 'car sales data')
pr(df.loc[3], 'return the row at index 3')
pr(df.loc[3]['Make'], 'return the Make value from row 3')

 ---------------------- car sales data ---------------------- 
type:  <class 'pandas.core.frame.DataFrame'>
     Make  Color  Odometer (KM)  Doors       Price
0  Toyota  White         150043      4   $4,000.00
1   Honda    Red          87899      4   $5,000.00
2  Toyota   Blue          32549      3   $7,000.00
3     BMW  Black          11179      5  $22,000.00
4  Nissan  White         213095      4   $3,500.00
5  Toyota  Green          99213      4   $4,500.00
6   Honda   Blue          45698      4   $7,500.00
7   Honda   Blue          54738      4   $7,000.00
8  Toyota  White          60000      4   $6,250.00
9  Nissan  White          31600      4   $9,700.00 

 ---------------------- return the row at index 3 ---------------------- 
type:  <class 'pandas.core.series.Series'>
Make                    BMW
Color                 Black
Odometer (KM)         11179
Doors                     5
Price            $22,000.00
Name: 3, dtype: object 

 ---------------------- return the Make value f

In [6]:
# if you used iloc it will depend on position not index
pr(animals , 'animals')
pr(animals.loc[3]  , 'return the animal at index 3')
pr(animals.iloc[3]  , 'return the animal at position 3')

 ---------------------- animals ---------------------- 
type:  <class 'pandas.core.series.Series'>
0       dog
3       cat
5    pandas
3     snake
dtype: object 

 ---------------------- return the animal at index 3 ---------------------- 
type:  <class 'pandas.core.series.Series'>
3      cat
3    snake
dtype: object 

 ---------------------- return the animal at position 3 ---------------------- 
type:  <class 'str'>
snake 



In [7]:
# with iloc we could use slicing
pr(animals , 'animals')
pr(animals.iloc[:2] , 'return all the animals from 0 to 2')

 ---------------------- animals ---------------------- 
type:  <class 'pandas.core.series.Series'>
0       dog
3       cat
5    pandas
3     snake
dtype: object 

 ---------------------- return all the animals from 0 to 2 ---------------------- 
type:  <class 'pandas.core.series.Series'>
0    dog
3    cat
dtype: object 



In [8]:
# we could select a whole column
pr(df, 'car sales data')
pr(df['Make'] , 'Only Make column from car sales')
pr(df.Make , 'Another way to select the column')

 ---------------------- car sales data ---------------------- 
type:  <class 'pandas.core.frame.DataFrame'>
     Make  Color  Odometer (KM)  Doors       Price
0  Toyota  White         150043      4   $4,000.00
1   Honda    Red          87899      4   $5,000.00
2  Toyota   Blue          32549      3   $7,000.00
3     BMW  Black          11179      5  $22,000.00
4  Nissan  White         213095      4   $3,500.00
5  Toyota  Green          99213      4   $4,500.00
6   Honda   Blue          45698      4   $7,500.00
7   Honda   Blue          54738      4   $7,000.00
8  Toyota  White          60000      4   $6,250.00
9  Nissan  White          31600      4   $9,700.00 

 ---------------------- Only Make column from car sales ---------------------- 
type:  <class 'pandas.core.series.Series'>
0    Toyota
1     Honda
2    Toyota
3       BMW
4    Nissan
5    Toyota
6     Honda
7     Honda
8    Toyota
9    Nissan
Name: Make, dtype: object 

 ---------------------- Another way to select the column -

In [9]:
# we could select and filter
# the syntax is very strange and need further explanation
pr(df , 'car sales')
pr(df[df['Make']=='Toyota'] , 'pick only Toyota')
pr(df[df['Odometer (KM)'] >150000] , 'pick only cars that have odometer above 150000')

 ---------------------- car sales ---------------------- 
type:  <class 'pandas.core.frame.DataFrame'>
     Make  Color  Odometer (KM)  Doors       Price
0  Toyota  White         150043      4   $4,000.00
1   Honda    Red          87899      4   $5,000.00
2  Toyota   Blue          32549      3   $7,000.00
3     BMW  Black          11179      5  $22,000.00
4  Nissan  White         213095      4   $3,500.00
5  Toyota  Green          99213      4   $4,500.00
6   Honda   Blue          45698      4   $7,500.00
7   Honda   Blue          54738      4   $7,000.00
8  Toyota  White          60000      4   $6,250.00
9  Nissan  White          31600      4   $9,700.00 

 ---------------------- pick only Toyota ---------------------- 
type:  <class 'pandas.core.frame.DataFrame'>
     Make  Color  Odometer (KM)  Doors      Price
0  Toyota  White         150043      4  $4,000.00
2  Toyota   Blue          32549      3  $7,000.00
5  Toyota  Green          99213      4  $4,500.00
8  Toyota  White        

In [11]:
# experimenting with boolean indexing
s = pd.Series([10,20,30,40,50,60])
pr(s.loc[[True , False,True , False,True , False]])

type:  <class 'pandas.core.series.Series'>
0    10
2    30
4    50
dtype: int64 



In [13]:
# we will look to new concept named broadcasting
pr(s+5 , 'add 5 to each element in s')
pr( s == 50 , 'check if each element equal 50')

 ---------------------- add 5 to each element in s ---------------------- 
type:  <class 'pandas.core.series.Series'>
0    15
1    25
2    35
3    45
4    55
5    65
dtype: int64 

 ---------------------- check if each element equal 50 ---------------------- 
type:  <class 'pandas.core.series.Series'>
0    False
1    False
2    False
3    False
4     True
5    False
dtype: bool 



In [15]:
pr(s.loc[s==50])

type:  <class 'pandas.core.series.Series'>
4    50
dtype: int64 

