<h1>Accessing Specific Value in a DataSeries Using Pandas</h1>

In [1]:
import pandas as pd

<h1>Using LOC and iLOC properties with a DataSeries</h1>

In [2]:
colours = pd.Series (['red','purple','blue','green','yellow'],
                    index = [1,2,3,5,8])
colours

1       red
2    purple
3      blue
5     green
8    yellow
dtype: object

In [3]:
colours[1]

'red'

In [4]:
colours.loc[1]  # Refers to label index

'red'

In [5]:
colours.iloc[1] # Refers to positional index

'purple'

In [6]:
colours.loc[3:8] # NOTE: LOC is upper bound inclusive

3      blue
5     green
8    yellow
dtype: object

In [7]:
colours.iloc[1:3] # NOTE: iLOC is upper bound exclusive

2    purple
3      blue
dtype: object

In [8]:
city_revenues = pd.Series([4200, 
                           8000, 
                           6500],
                          index = ['Amsterdam', 
                                   'Toronto', 
                                   'Tokyo'])  # Manually assign index to each IDENTIFIER
city_revenues

# Use a dictionary as a data series -> same exact result as data series with explicitly stated indexing vs data series with dictionary
city_employee_count = {'Amsterdam': 5,
                      'Tokyo': 8}

employee_count = pd.Series(city_employee_count)
employee_count

city_data = pd.DataFrame(
    {'revenue': city_revenues,
    'employee count': employee_count}  # Key of Dictionary = Column, Index of Value = Row, Value of value = Cell
)
city_data  # Automatically reference the same index

Unnamed: 0,revenue,employee count
Amsterdam,4200,5.0
Tokyo,6500,8.0
Toronto,8000,


In [11]:
# Use LOC with DataFrame instead of DataSeries
city_data.loc['Toronto']

revenue           8000.0
employee count       NaN
Name: Toronto, dtype: float64

In [None]:
# Using LOC with individual DS
city_data.loc['Amsterdam':'Tokyo', 'revenue']  # Acting on revenue DS

<h3>Grouping and Aggregating Data</h3>

In [None]:
# At-a-glance statistical analysis
city_data.describe()

In [None]:
# All math module stuff comes with Pandas
city_revenues.sum()

In [None]:
city_revenues.max()

In [None]:
city_revenues.mean()

<h1>Combining Multiple DataFrames</h1>

In [None]:
city_data # OG DataFrame

In [None]:
further_city_data = pd.DataFrame({'revenue': [700, 3400,],
                                 'employee count':[2,2]},
                                index = ['New York','Barcelona'])
further_city_data

In [None]:
all_city_data = pd.concat([city_data, further_city_data], sort = False)
all_city_data

In [None]:
# Adding items to the DataFrame - columns
city_countries = pd.DataFrame({'country':['Holland','Japan','Canada','Spain'],
                              'capital':['Amsterdam','Tokyo','Ottawa','Madrid']},
                             index = ['Amsterdam','Tokyo','Toronto','Barcelona'])

city_countries

In [None]:
cities = pd.concat([all_city_data, city_countries], axis = 1, sort = False)  # Specify axis to add as data series
cities