In [2]:
!pip install pandas



You should consider upgrading via the 'C:\Users\riyan\AppData\Local\Programs\Python\Python310\python.exe -m pip install --upgrade pip' command.


# Pandas

In [2]:
import numpy as np
import pandas as pd

## Series

In [3]:
revenues = pd.Series([5555, 7000, 1980])

In [None]:
# # simple array
# data = np.array([5555, 7000, 1980])
# revenues = pd.Series(data)
# print(revenues)

In [4]:
revenues

0    5555
1    7000
2    1980
dtype: int64

In [5]:
revenues.values

array([5555, 7000, 1980], dtype=int64)

In [6]:
revenues.index

RangeIndex(start=0, stop=3, step=1)

In [7]:
city_revenues = pd.Series(
    [4200, 8000, 6500],
    index=["Jakarta", "New York", "London"]
)
city_revenues

Jakarta     4200
New York    8000
London      6500
dtype: int64

In [8]:
city_employee_count = pd.Series({"Jakarta": 5, "London": 8})
city_employee_count

Jakarta    5
London     8
dtype: int64

In [9]:
city_employee_count.keys()

Index(['Jakarta', 'London'], dtype='object')

In [13]:
"London" in city_employee_count

True

In [14]:
"Tokyo" in city_employee_count

False

## Data frame

In [15]:
city_data = pd.DataFrame({
    "revenue": city_revenues,
    "employee_count": city_employee_count
})

In [16]:
city_data

Unnamed: 0,revenue,employee_count
Jakarta,4200,5.0
London,6500,8.0
New York,8000,


In [17]:
city_data.index

Index(['Jakarta', 'London', 'New York'], dtype='object')

In [18]:
city_data.values

array([[4.2e+03, 5.0e+00],
       [6.5e+03, 8.0e+00],
       [8.0e+03,     nan]])

In [19]:
city_data.axes

[Index(['Jakarta', 'London', 'New York'], dtype='object'),
 Index(['revenue', 'employee_count'], dtype='object')]

In [20]:
city_data.axes[0]

Index(['Jakarta', 'London', 'New York'], dtype='object')

In [21]:
city_data.axes[1]

Index(['revenue', 'employee_count'], dtype='object')

In [22]:
city_data.keys()

Index(['revenue', 'employee_count'], dtype='object')

In [23]:
"Jakarta" in city_data

False

In [24]:
"revenue" in city_data

True

## Accessing Series Elements

In [25]:
city_revenues

Jakarta     4200
New York    8000
London      6500
dtype: int64

In [26]:
city_revenues["New York"]

8000

In [28]:
city_revenues[0]

4200

In [29]:
city_revenues[-1]

6500

In [30]:
city_revenues[1:]

New York    8000
London      6500
dtype: int64

In [31]:
city_revenues["New York":]

New York    8000
London      6500
dtype: int64

## Using .loc and .iloc

In [32]:
colors = pd.Series(
    ["red", "purple", "blue", "green", "yellow"],
    index=[1, 2, 3, 5, 8]
)

In [33]:
colors

1       red
2    purple
3      blue
5     green
8    yellow
dtype: object

In [34]:
colors.loc[1]

'red'

In [35]:
colors.iloc[1]

'purple'

In [36]:
# Return the elements with the implicit index: 1, 2

colors.iloc[1:3]

2    purple
3      blue
dtype: object

In [37]:
# Return the elements with the explicit index between 3 and 8

colors.loc[3:8]

3      blue
5     green
8    yellow
dtype: object

In [38]:
colors.iloc[-2]

'green'

## Accessing DataFrame Elements

In [39]:
city_data["revenue"]

Jakarta     4200
London      6500
New York    8000
Name: revenue, dtype: int64

In [40]:
city_data.revenue

Jakarta     4200
London      6500
New York    8000
Name: revenue, dtype: int64

In [41]:
toys = pd.DataFrame([
    {"name": "ball", "shape": "sphere"},
    {"name": "Rubik's cube", "shape": "cube"}
])

In [42]:
toys["shape"]

0    sphere
1      cube
Name: shape, dtype: object

In [43]:
toys.shape

(2, 2)

## Using .loc and .iloc

In [44]:
city_data.loc["Jakarta"]

revenue           4200.0
employee_count       5.0
Name: Jakarta, dtype: float64

In [45]:
city_data.loc["London": "New York"]

Unnamed: 0,revenue,employee_count
London,6500,8.0
New York,8000,


In [46]:
city_data.iloc[1]

revenue           6500.0
employee_count       8.0
Name: London, dtype: float64

In [47]:
city_data.loc["Jakarta": "London", "revenue"]

Jakarta    4200
London     6500
Name: revenue, dtype: int64

In [48]:
city_revenues.sum() #penjumlahan

18700

In [49]:
city_revenues.max()

8000

## Combining Multiple Datasets

In [50]:
further_city_data = pd.DataFrame(
    {"revenue": [7000, 3400], "employee_count": [2, 2]},
    index=["Milan", "Barcelona"]
)

In [51]:
city_data

Unnamed: 0,revenue,employee_count
Jakarta,4200,5.0
London,6500,8.0
New York,8000,


In [52]:
all_city_data = pd.concat([city_data, further_city_data], sort=False)

In [53]:
all_city_data

Unnamed: 0,revenue,employee_count
Jakarta,4200,5.0
London,6500,8.0
New York,8000,
Milan,7000,2.0
Barcelona,3400,2.0


In [54]:
city_countries = pd.DataFrame({
    "country": ["Indonesia", "United Kingdom", "USA", "Italia", "Spain"],
    "capital": [1, 1, 0, 0, 0]},
    index=["Jakarta", "London", "New York", "Milan", "Barcelona"]
)

In [55]:
city_countries

Unnamed: 0,country,capital
Jakarta,Indonesia,1
London,United Kingdom,1
New York,USA,0
Milan,Italia,0
Barcelona,Spain,0


In [56]:
all_city_data

Unnamed: 0,revenue,employee_count
Jakarta,4200,5.0
London,6500,8.0
New York,8000,
Milan,7000,2.0
Barcelona,3400,2.0


In [57]:
cities = pd.concat([all_city_data, city_countries], axis=1, sort=False)

In [58]:
cities

Unnamed: 0,revenue,employee_count,country,capital
Jakarta,4200,5.0,Indonesia,1
London,6500,8.0,United Kingdom,1
New York,8000,,USA,0
Milan,7000,2.0,Italia,0
Barcelona,3400,2.0,Spain,0


In [59]:
pd.concat([all_city_data, city_countries], axis=1, join="inner")

Unnamed: 0,revenue,employee_count,country,capital
Jakarta,4200,5.0,Indonesia,1
London,6500,8.0,United Kingdom,1
New York,8000,,USA,0
Milan,7000,2.0,Italia,0
Barcelona,3400,2.0,Spain,0


In [60]:
countries = pd.DataFrame({
    "population_millions": [127, 37, 137],
    "continent": ["Asia", "Europe", "North America"]
}, index=["Jakarta", "London", "New York"])

In [61]:
countries

Unnamed: 0,population_millions,continent
Jakarta,127,Asia
London,37,Europe
New York,137,North America


In [62]:
cities

Unnamed: 0,revenue,employee_count,country,capital
Jakarta,4200,5.0,Indonesia,1
London,6500,8.0,United Kingdom,1
New York,8000,,USA,0
Milan,7000,2.0,Italia,0
Barcelona,3400,2.0,Spain,0


In [65]:
pd.merge(cities, countries, left_on="country", right_index=True)

Unnamed: 0,revenue,employee_count,country,capital,population_millions,continent


In [64]:
pd.merge(
    cities,
    countries,
    left_on="country",
    right_index=True,
    how="left"
)

Unnamed: 0,revenue,employee_count,country,capital,population_millions,continent
Jakarta,4200,5.0,Indonesia,1,,
London,6500,8.0,United Kingdom,1,,
New York,8000,,USA,0,,
Milan,7000,2.0,Italia,0,,
Barcelona,3400,2.0,Spain,0,,
