# Pandas
Pandas are used together with numpy. They form an extension of the numpy ndarray object.
It consists of Series and Dataframe as its key objects

## Pandas Series object
A 1-D array of indexed data

In [1]:
# import the packages
import numpy as np
import pandas as pd

In [6]:
# Create a Series from a list or array
data = pd.Series([0.1, 6.35, 7.0, 13.7, 8.0])
# display the data
print("data:\n", data)

# display the values only
print("values: ", data.values)

# display the indices(as a range)
print("indices: ", data.index)

# access a value
print("data at index 3: ", data[3])

data:
 0     0.10
1     6.35
2     7.00
3    13.70
4     8.00
dtype: float64
values:  [ 0.1   6.35  7.   13.7   8.  ]
indices:  RangeIndex(start=0, stop=5, step=1)
data at index 3:  13.7


In [7]:
# Series with custom indices
custom_data = pd.Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 'd', 'e'])
custom_data

a    1
b    2
c    3
d    4
e    5
dtype: int64

In [8]:
# A series object can also be created from a dictionary. The indices will be the dict keys.
ages = {
    'kasee': 22,
    'Ken': 27,
    'Kashee': 19,
    'Miro': 21,
    'Phoebe': 20
}

age_data = pd.Series(ages)
age_data

kasee     22
Ken       27
Kashee    19
Miro      21
Phoebe    20
dtype: int64

## The DataFrame Object
Can be created from an array or a dict
format: pd.DataFrame(data, columns='', index='')

In [2]:
# States data
population = {
    'Seattle': 124765,
    'Atlanta': 7459843,
    'Kansas': 124765,
    'Connecticut': 124765,
    'Viginia': 124765,
}

# State areas
area = {
    'Seattle': 475398475,
    'Atlanta': 7589347,
    'Kansas': 985904384,
    'Connecticut': 759874,
    'Viginia': 509438
}

# create a dataframe
states = pd.DataFrame({
    'population': population,
    'area': area
})

states

Unnamed: 0,population,area
Atlanta,7459843,7589347
Connecticut,124765,759874
Kansas,124765,985904384
Seattle,124765,475398475
Viginia,124765,509438


In [4]:
# Accessing individual data
popn = states['population'] # or states.population
popn

Atlanta        7459843
Connecticut     124765
Kansas          124765
Seattle         124765
Viginia         124765
Name: population, dtype: int64

## Data Indexing and Selection
Normal slicing and indexing brings confusion, hence `iloc` and `loc` attribs are used for implicit(normal python list-style) and explicit indexing, respectively

In [5]:
data = pd.Series(['a', 'b', 'c'], index=[1, 2, 3])
data

1    a
2    b
3    c
dtype: object

In [7]:
# Trying to access the data locally
print(data[1]) # explicit index when indexing
print(data[1:3]) # implicit index when slicing

a
2    b
3    c
dtype: object


In [9]:
# To solve this, loc and iloc are used
# Explicit indexing
print(data.loc[1])
print(data.loc[1:3])

# implicit indexing
print(data.iloc[1])
print(data.iloc[1:3])

a
1    a
2    b
3    c
dtype: object
b
2    b
3    c
dtype: object
