In [2]:
import pandas as pd

In [3]:
revenues = pd.Series([
    5555, 7000, 1980
])
revenues

0    5555
1    7000
2    1980
dtype: int64

In [4]:
#The `.values` attribute of a Pandas Series returns the underlying NumPy array that stores the data in the Series.

revenues.values



array([5555, 7000, 1980], dtype=int64)

In [5]:
# The .index attribute returns the index (axis labels) of the Series. It tells you "what each value represents" , like row names or identifiers.

revenues.index

RangeIndex(start=0, stop=3, step=1)

In [6]:
city_revenues = pd.Series(
    [4200, 8000, 6500],
    index = ['Amsterdam', 'Toronto', 'Tokyo']
)
city_revenues

Amsterdam    4200
Toronto      8000
Tokyo        6500
dtype: int64

In [7]:
# label-based indexing or index label lookup

city_revenues['Toronto']

8000

In [8]:
city_employee_count_data = {'Amsterdam': 5, 'Tokyo': 8}
city_employee_count = pd.Series(city_employee_count_data)
city_employee_count

Amsterdam    5
Tokyo        8
dtype: int64

In [9]:
city_employee_count.keys()

Index(['Amsterdam', 'Tokyo'], dtype='object')

Why Use .keys()?
- It's familiar to users coming from Python dictionaries , where .keys() returns dictionary keys.
- It’s useful in code that treats Pandas objects like dictionaries (e.g., looping or checking membership).

In [10]:
'Tokyo' in city_employee_count

True

In [11]:
for city in city_employee_count.keys():
    print(city, city_employee_count[city])

Amsterdam 5
Tokyo 8


In [12]:
city_data = pd.DataFrame({
    'revenue': city_revenues,
    'employee_count': city_employee_count
})
city_data

Unnamed: 0,revenue,employee_count
Amsterdam,4200,5.0
Tokyo,6500,8.0
Toronto,8000,


In [13]:
#  the .axes attribute of a DataFrame returns a list of the row and column labels (indices) — essentially, the axis labels for the DataFrame.

city_data.axes

[Index(['Amsterdam', 'Tokyo', 'Toronto'], dtype='object'),
 Index(['revenue', 'employee_count'], dtype='object')]

In [14]:
city_revenues[1]

  city_revenues[1]


8000

In [15]:
city_revenues['Toronto']

8000

In [16]:
city_revenues[-1]

  city_revenues[-1]


6500

In [17]:
city_revenues['Toronto':]

Toronto    8000
Tokyo      6500
dtype: int64

In [18]:
colors = pd.Series(
    ['red', 'purple', 'blue', 'green', 'yellow'],
    index=[1, 2, 3, 5, 8]
)
colors

1       red
2    purple
3      blue
5     green
8    yellow
dtype: object

In [19]:
colors[1]

#If the index is made of integers , then series[int] will try to access by label , not by position.

'red'

In [20]:
colors.loc[1]

'red'

In [21]:
colors.iloc[1]

'purple'

In [22]:
colors.iloc[1:3]

2    purple
3      blue
dtype: object

In [23]:
colors.loc[3:8]

3      blue
5     green
8    yellow
dtype: object

In [24]:
colors.iloc[-2]

'green'

In [25]:
city_data.revenue

Amsterdam    4200
Tokyo        6500
Toronto      8000
Name: revenue, dtype: int64

In [26]:
city_data.loc['Amsterdam']

revenue           4200.0
employee_count       5.0
Name: Amsterdam, dtype: float64

In [27]:
city_data.iloc[1]

revenue           6500.0
employee_count       8.0
Name: Tokyo, dtype: float64

In [28]:
city_data.loc['Tokyo': 'Toronto']

Unnamed: 0,revenue,employee_count
Tokyo,6500,8.0
Toronto,8000,


In [29]:
city_data.loc['Amsterdam': 'Tokyo', 'revenue']

Amsterdam    4200
Tokyo        6500
Name: revenue, dtype: int64

In [30]:
city_revenues

Amsterdam    4200
Toronto      8000
Tokyo        6500
dtype: int64

In [43]:
city_data.sum()

revenue           18700.0
employee_count       13.0
dtype: float64

In [31]:
city_revenues.sum()

18700

In [32]:
city_revenues.max()

8000

In [33]:
city_revenues.min()

4200

In [34]:
city_revenues.mean()

6233.333333333333

In [35]:
city_data

Unnamed: 0,revenue,employee_count
Amsterdam,4200,5.0
Tokyo,6500,8.0
Toronto,8000,


In [36]:
further_city_data = pd.DataFrame(
    {
        'revenue': [7000, 3400],
        'employee_count': [2, 2]
    },
    index=['New York', 'Barcelona']
)



In [44]:
all_city_data = pd.concat([city_data, further_city_data], sort=False)
all_city_data



Unnamed: 0,revenue,employee_count
Amsterdam,4200,5.0
Tokyo,6500,8.0
Toronto,8000,
New York,7000,2.0
Barcelona,3400,2.0


In [45]:
city_countries = pd.DataFrame({
    'country': ['Holland', 'Japan', 'Holland', 'Canada', 'Spain'],
    'capital': [1, 1, 0, 0, 0]
}, index=['Amsterdam', 'Tokyo', 'Rotterdam', 'Toronto', 'Barcelona'])

In [47]:
city_countries.axes

[Index(['Amsterdam', 'Tokyo', 'Rotterdam', 'Toronto', 'Barcelona'], dtype='object'),
 Index(['country', 'capital'], dtype='object')]

In [48]:
all_city_data


Unnamed: 0,revenue,employee_count
Amsterdam,4200,5.0
Tokyo,6500,8.0
Toronto,8000,
New York,7000,2.0
Barcelona,3400,2.0


In [49]:
city_countries

Unnamed: 0,country,capital
Amsterdam,Holland,1
Tokyo,Japan,1
Rotterdam,Holland,0
Toronto,Canada,0
Barcelona,Spain,0


In [39]:
cities = pd.concat([all_city_data, city_countries], axis=1, sort=False, join='inner')
cities

Unnamed: 0,revenue,employee_count,country,capital
Amsterdam,4200,5.0,Holland,1
Tokyo,6500,8.0,Japan,1
Toronto,8000,,Canada,0
Barcelona,3400,2.0,Spain,0


In [40]:
countries = pd.DataFrame({
    'population_millions': [17, 127, 37],
    'continent': ['Europe', 'Asia', 'North America']
}, index=['Holland', 'Japan', 'Canada'])

In [41]:
pd.merge(cities, countries, left_on='country', right_index=True)

Unnamed: 0,revenue,employee_count,country,capital,population_millions,continent
Amsterdam,4200,5.0,Holland,1,17,Europe
Tokyo,6500,8.0,Japan,1,127,Asia
Toronto,8000,,Canada,0,37,North America


In [42]:
pd.merge(cities, countries, left_on='country', right_index=True, how='left')

Unnamed: 0,revenue,employee_count,country,capital,population_millions,continent
Amsterdam,4200,5.0,Holland,1,17.0,Europe
Tokyo,6500,8.0,Japan,1,127.0,Asia
Toronto,8000,,Canada,0,37.0,North America
Barcelona,3400,2.0,Spain,0,,
