In [2]:
import numpy as np
import pandas as pd

In [6]:
revenues = pd.Series([5555, 7000, 1900])

In [7]:
revenues.values

array([5555, 7000, 1900], dtype=int64)

In [8]:
revenues.index

RangeIndex(start=0, stop=3, step=1)

In [10]:
city_revenues = pd.Series(
    [4200, 8000, 6500],
    index=["Amsterdam", "Toronto", "Tokyo"]
)
city_revenues

Amsterdam    4200
Toronto      8000
Tokyo        6500
dtype: int64

In [11]:
city_employee_count = pd.Series({"Amsterdam": 5, "Tokyo": 8})
city_employee_count

Amsterdam    5
Tokyo        8
dtype: int64

In [12]:
city_employee_count.keys()

Index(['Amsterdam', 'Tokyo'], dtype='object')

In [13]:
"Tokyo" in city_employee_count

True

In [14]:
"New York" in city_employee_count

False

In [15]:
city_data = pd.DataFrame({
    "revenue": city_revenues,
    "employee_count": city_employee_count
})

In [16]:
city_data

Unnamed: 0,revenue,employee_count
Amsterdam,4200,5.0
Tokyo,6500,8.0
Toronto,8000,


In [17]:
city_data.index

Index(['Amsterdam', 'Tokyo', 'Toronto'], dtype='object')

In [18]:
city_data.values

array([[4.2e+03, 5.0e+00],
       [6.5e+03, 8.0e+00],
       [8.0e+03,     nan]])

In [19]:
city_data.axes

[Index(['Amsterdam', 'Tokyo', 'Toronto'], dtype='object'),
 Index(['revenue', 'employee_count'], dtype='object')]

In [20]:
city_data.axes[0]

Index(['Amsterdam', 'Tokyo', 'Toronto'], dtype='object')

In [21]:
city_data.axes[1]

Index(['revenue', 'employee_count'], dtype='object')

In [22]:
city_data.keys()

Index(['revenue', 'employee_count'], dtype='object')

In [26]:
city_data.columns

Index(['revenue', 'employee_count'], dtype='object')

In [24]:
"Amsterdam" in city_data

False

In [25]:
"revenue" in city_data

True

### ACCESS ELEMENT

#### SERIES

In [27]:
city_revenues

Amsterdam    4200
Toronto      8000
Tokyo        6500
dtype: int64

In [28]:
city_revenues['Toronto']

8000

In [29]:
city_revenues[1]

8000

In [30]:
city_revenues[-1]

6500

In [31]:
#city_revenues[start:stop]step
city_revenues[1:]

Toronto    8000
Tokyo      6500
dtype: int64

In [33]:
city_revenues["Amsterdam":]

Amsterdam    4200
Toronto      8000
Tokyo        6500
dtype: int64

In [37]:
#NOTES: Accessing data through square bracket with label will INCLUDE the stop index
city_revenues["Amsterdam":"Tokyo"]

Amsterdam    4200
Toronto      8000
Tokyo        6500
dtype: int64

In [39]:
colors = pd.Series(['red', 'purple', 'blue', 'green', 'yellow'], index=[1,2,3,5,8])

In [44]:
#loc = LABEL index, INCLUDE the stop index (behave like label slicing)
#iloc = POSITIONAL index, EXCLUDE the stop index (slicing in general)
print(colors.loc[1])
print(colors.iloc[1])
print(colors.iloc[1:3])
print(colors.loc[3:8])
print(colors.iloc[-2])

red
purple
2    purple
3      blue
dtype: object
3      blue
5     green
8    yellow
dtype: object
green


#### DATAFRAME

In [45]:
city_data

Unnamed: 0,revenue,employee_count
Amsterdam,4200,5.0
Tokyo,6500,8.0
Toronto,8000,


In [47]:
city_data['revenue']

Amsterdam    4200
Tokyo        6500
Toronto      8000
Name: revenue, dtype: int64

In [48]:
city_data['employee_count']

Amsterdam    5.0
Tokyo        8.0
Toronto      NaN
Name: employee_count, dtype: float64

In [50]:
#dot notation with column name
city_data.revenue

Amsterdam    5.0
Tokyo        8.0
Toronto      NaN
Name: employee_count, dtype: float64

In [51]:
city_data.employee_count

Amsterdam    5.0
Tokyo        8.0
Toronto      NaN
Name: employee_count, dtype: float64

In [53]:
toys = pd.DataFrame([
    {"name": "ball", "shape": "sphere"},
    {"name": "Rubik's cube", "shape": "cube"}
])
toys

Unnamed: 0,name,shape
0,ball,sphere
1,Rubik's cube,cube


In [55]:
city_data[0:1]

Unnamed: 0,revenue,employee_count
Amsterdam,4200,5.0


In [60]:
#loc and iloc in DataFrame will access the label/positional INDEX of DataFrame 
city_data.loc['Amsterdam']

revenue           4200.0
employee_count       5.0
Name: Amsterdam, dtype: float64

In [57]:
city_data.loc["Tokyo":]

Unnamed: 0,revenue,employee_count
Tokyo,6500,8.0
Toronto,8000,


In [75]:
city_data["Amsterdam":"Tokyo"]["revenue"]

Unnamed: 0,revenue,employee_count
Amsterdam,4200,5.0
Tokyo,6500,8.0


In [62]:
#loc iloc accept second argument to determine which column to show. This code implies the 
#same as above
city_data.loc["Amsterdam":, "revenue"]

Amsterdam    4200
Tokyo        6500
Toronto      8000
Name: revenue, dtype: int64

In [73]:
#if you want to include more columns, make a list of the columns name, then pass it as the 2nd argument
city_data.loc["Amsterdam":, ["revenue", "employee_count"]]

Unnamed: 0,revenue,employee_count
Amsterdam,4200,5.0
Tokyo,6500,8.0
Toronto,8000,


In [76]:
#this can be done in square bracket as well
city_data["Amsterdam":"Tokyo"][["revenue", "employee_count"]]

Unnamed: 0,revenue,employee_count
Amsterdam,4200,5.0
Tokyo,6500,8.0


In [80]:
#city_revenues.sum()
city_data.sum()

revenue           18700.0
employee_count       13.0
dtype: float64

In [78]:
city_revenues.max()

8000

In [81]:
further_city_data = pd.DataFrame(
    {"revenue": [7000, 3400], "employee_count": [2, 2]},
    index=["New York", "Barcelona"]
)

In [82]:
further_city_data

Unnamed: 0,revenue,employee_count
New York,7000,2
Barcelona,3400,2


In [83]:
city_data

Unnamed: 0,revenue,employee_count
Amsterdam,4200,5.0
Tokyo,6500,8.0
Toronto,8000,


In [85]:
all_city_data = pd.concat([city_data, further_city_data], sort=False)
all_city_data

Unnamed: 0,revenue,employee_count
Amsterdam,4200,5.0
Tokyo,6500,8.0
Toronto,8000,
New York,7000,2.0
Barcelona,3400,2.0


In [87]:
city_countries = pd.DataFrame({
    "country": ["Holland", "Japan", "Holland", "Canada", "Spain"],
    "capital": [1, 1, 0, 0, 0]},
    index=["Amsterdam", "Tokyo", "Rotterdam", "Toronto", "Barcelona"]
)
city_countries

Unnamed: 0,country,capital
Amsterdam,Holland,1
Tokyo,Japan,1
Rotterdam,Holland,0
Toronto,Canada,0
Barcelona,Spain,0


In [90]:
#by default, concat will add to the row. axis=1 will make sure that it'll be added as new columns
#by default, sort is false
cities = pd.concat([all_city_data, city_countries], axis=1, sort=False)
cities

Unnamed: 0,revenue,employee_count,country,capital
Amsterdam,4200.0,5.0,Holland,1.0
Tokyo,6500.0,8.0,Japan,1.0
Toronto,8000.0,,Canada,0.0
New York,7000.0,2.0,,
Barcelona,3400.0,2.0,Spain,0.0
Rotterdam,,,Holland,0.0


In [91]:
pd.concat([all_city_data, city_countries], axis=1, join="inner")

Unnamed: 0,revenue,employee_count,country,capital
Amsterdam,4200,5.0,Holland,1
Tokyo,6500,8.0,Japan,1
Toronto,8000,,Canada,0
Barcelona,3400,2.0,Spain,0


In [93]:
countries = pd.DataFrame({
    "population_millions": [17, 127, 37],
    "continent": ["Europe", "Asia", "North America"]
}, index=["Holland", "Japan", "Canada"])
countries

Unnamed: 0,population_millions,continent
Holland,17,Europe
Japan,127,Asia
Canada,37,North America


In [94]:
pd.merge(cities, countries, left_on="country", right_index=True)
#in case that the right country is not right_index, you can use right_on parameter 
#in a same way as left_on argument 

Unnamed: 0,revenue,employee_count,country,capital,population_millions,continent
Amsterdam,4200.0,5.0,Holland,1.0,17,Europe
Rotterdam,,,Holland,0.0,17,Europe
Tokyo,6500.0,8.0,Japan,1.0,127,Asia
Toronto,8000.0,,Canada,0.0,37,North America
