### Pandas Basics

In [1]:
import pandas as pd
import numpy as np
import warnings

warnings.filterwarnings("ignore")

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
ps = pd.Series(["a", 2, np.pi, 36])
print(ps)

0           a
1           2
2    3.141593
3          36
dtype: object


In [3]:
# values only
print(ps.values)

['a' 2 3.141592653589793 36]


In [4]:
# indices only
print(ps.index)

RangeIndex(start=0, stop=4, step=1)


In [5]:
print(ps[1:3])

1           2
2    3.141593
dtype: object


In [7]:
ps = pd.Series(
    data=[
        "mozzarella caprese",
        "Wiener Schnitzel",
        "Schwartwalder Kirschtorte",
        "lemonade",
        "whiskey",
    ],
    index=["appetizer", "main course", "dessert", "beverage", "alcohol"],
)

ps

appetizer             mozzarella caprese
main course             Wiener Schnitzel
dessert        Schwartwalder Kirschtorte
beverage                        lemonade
alcohol                          whiskey
dtype: object

In [8]:
ps.loc[["appetizer", "dessert", "beverage"]]

appetizer           mozzarella caprese
dessert      Schwartwalder Kirschtorte
beverage                      lemonade
dtype: object

In [9]:
ps.iloc[1:3]

main course             Wiener Schnitzel
dessert        Schwartwalder Kirschtorte
dtype: object

In [10]:
dc_city_pop = {
    'Tokyo': 37339804,
    'Delhi': 31181376,
    'Shanghai': 27795702,
    'Sao Paulo': 22237472,
    'Mexico City': 21918936,
    'Dhaka': 21741090,
    'Cairo': 21322750,
    'Beijing': 20896820,
    'Mumbai': 20667656,
    'Osaka': 19110616
}

In [11]:
ps_city_pop = pd.Series(dc_city_pop)
ps_city_pop

Tokyo          37339804
Delhi          31181376
Shanghai       27795702
Sao Paulo      22237472
Mexico City    21918936
Dhaka          21741090
Cairo          21322750
Beijing        20896820
Mumbai         20667656
Osaka          19110616
dtype: int64

In [12]:
print(ps_city_pop.index)
print(ps_city_pop.values)

Index(['Tokyo', 'Delhi', 'Shanghai', 'Sao Paulo', 'Mexico City', 'Dhaka',
       'Cairo', 'Beijing', 'Mumbai', 'Osaka'],
      dtype='object')
[37339804 31181376 27795702 22237472 21918936 21741090 21322750 20896820
 20667656 19110616]


In [13]:
dc_city_countries = {
    "Tokyo": "Japan",
    "Delhi": "India",
    "Shanghai": "China",
    "Sao Paulo": "Brazil",
    "Mexico City": "Mexico",
    "Dhaka": "Bangladesh",
    "Cairo": "Egypt",
    "Beijing": "China",
    "Mumbai": "India",
    "Osaka": "Japan",
}

In [15]:
ps_city_countries = pd.Series(dc_city_countries)
ps_city_countries

Tokyo               Japan
Delhi               India
Shanghai            China
Sao Paulo          Brazil
Mexico City        Mexico
Dhaka          Bangladesh
Cairo               Egypt
Beijing             China
Mumbai              India
Osaka               Japan
dtype: object

In [16]:
print(ps_city_countries.index)
print(ps_city_countries.values)

Index(['Tokyo', 'Delhi', 'Shanghai', 'Sao Paulo', 'Mexico City', 'Dhaka',
       'Cairo', 'Beijing', 'Mumbai', 'Osaka'],
      dtype='object')
['Japan' 'India' 'China' 'Brazil' 'Mexico' 'Bangladesh' 'Egypt' 'China'
 'India' 'Japan']


In [17]:
df_cities = pd.concat([ps_city_pop, ps_city_countries], axis=1)
df_cities

Unnamed: 0,0,1
Tokyo,37339804,Japan
Delhi,31181376,India
Shanghai,27795702,China
Sao Paulo,22237472,Brazil
Mexico City,21918936,Mexico
Dhaka,21741090,Bangladesh
Cairo,21322750,Egypt
Beijing,20896820,China
Mumbai,20667656,India
Osaka,19110616,Japan


In [18]:
df_cities.columns = ['population', 'country']
df_cities

Unnamed: 0,population,country
Tokyo,37339804,Japan
Delhi,31181376,India
Shanghai,27795702,China
Sao Paulo,22237472,Brazil
Mexico City,21918936,Mexico
Dhaka,21741090,Bangladesh
Cairo,21322750,Egypt
Beijing,20896820,China
Mumbai,20667656,India
Osaka,19110616,Japan


In [19]:
# some rows
df_cities.iloc[2:5]

Unnamed: 0,population,country
Shanghai,27795702,China
Sao Paulo,22237472,Brazil
Mexico City,21918936,Mexico


In [20]:
# some rows and some columns
df_cities.iloc[2:5, 1]

Shanghai        China
Sao Paulo      Brazil
Mexico City    Mexico
Name: country, dtype: object

In [21]:
# list of cities (note the double squared brackets)
df_cities.loc[["Shanghai", "Dhaka", "Osaka"]]

Unnamed: 0,population,country
Shanghai,27795702,China
Dhaka,21741090,Bangladesh
Osaka,19110616,Japan


In [22]:
# list of cities + a column
df_cities.loc[["Shanghai", "Dhaka", "Osaka"], "country"]

Shanghai         China
Dhaka       Bangladesh
Osaka            Japan
Name: country, dtype: object

In [23]:
# a range of cities from the index
df_cities.loc["Tokyo":"Sao Paulo"]

Unnamed: 0,population,country
Tokyo,37339804,Japan
Delhi,31181376,India
Shanghai,27795702,China
Sao Paulo,22237472,Brazil
