# 01 - Intro to Pandas

## Objectives:
- Introduction about Jupyter Environment
- Refresher on Python
- Pandas data structure (Series and DataFrame)

In [1]:
print("Hello pandas")

Hello pandas


## Importing Packages

In [2]:
# Import package as pandas does not ship along as native package
# use pip install pandas if not installed yet, or follow setup guide
import pandas as pd
import matplotlib.pyplot as plt

## Native Python Data Types

In [3]:
# Introducing primitive Python data types
integers = 2
floats = 1.11
string = "This is a string."

## Series

In [4]:
# Introducing Series
# DESC: a Series is similar to a fixed-size dict that has key-value pairs (index and values)
a = pd.Series([1, 2, 3])
a

0    1
1    2
2    3
dtype: int64

In [5]:
b = pd.Series(range(3))
b

0    0
1    1
2    2
dtype: int64

In [6]:
# Series only can consists of one type of primitive Python data type
c = pd.Series([1, 2, "a"])
c

0    1
1    2
2    a
dtype: object

In [7]:
# some useful attributes
a.dtype

dtype('int64')

In [8]:
a.values

array([1, 2, 3], dtype=int64)

In [9]:
b.index

RangeIndex(start=0, stop=3, step=1)

In [10]:
# Reassigning indices
b.index = ["apple", "banana", "coconut"]
b.index

Index(['apple', 'banana', 'coconut'], dtype='object')

In [11]:
b

apple      0
banana     1
coconut    2
dtype: int64

In [12]:
# Reassigning Series name and index name
b.name = "Quantity of Food"
b.index.name = "Food index"
b

Food index
apple      0
banana     1
coconut    2
Name: Quantity of Food, dtype: int64

In [13]:
# simple indexing
b[1]

1

In [14]:
b["apple"]

0

In [15]:
b[["apple", "coconut"]]

Food index
apple      0
coconut    2
Name: Quantity of Food, dtype: int64

In [16]:
# some useful methods
b.get("apple")

0

In [17]:
b.drop("banana")

Food index
apple      0
coconut    2
Name: Quantity of Food, dtype: int64

In [18]:
b.rename("Renamed quantity of food")

Food index
apple      0
banana     1
coconut    2
Name: Renamed quantity of food, dtype: int64

## DataFrame

In [19]:
# Introducting DataFrame
# DESC: A 2D labeled data structure which can consist of diverse data types
# (akin to spreadsheet or SQL table or a dict of Series objects)
# some of the accepted inputs as below:
# - Dict of 1D ndarrays, lists, dicts, or Series
# - 2-D numpy.ndarray
# - Structured or record ndarray
# - A Series
# - Another DataFrame

# creating DataFrame
e = {
    "first_col": pd.Series([1, 2], index=["a", "b"]),
    "second_col": pd.Series([1.0, 2.0, 3.0], index=["a", "b", "c"]),
    "third_col": pd.Series(range(3), index=["a", "b", "c"]),
}

dataframe_from_e = pd.DataFrame(e)
dataframe_from_e

Unnamed: 0,first_col,second_col,third_col
a,1.0,1.0,0
b,2.0,2.0,1
c,,3.0,2


In [20]:
# useful DataFrame attributes
dataframe_from_e.index

Index(['a', 'b', 'c'], dtype='object')

In [21]:
dataframe_from_e.columns

Index(['first_col', 'second_col', 'third_col'], dtype='object')

In [22]:
# DataFrame indexing
dataframe_from_e["first_col"]

a    1.0
b    2.0
c    NaN
Name: first_col, dtype: float64

In [23]:
dataframe_from_e["fourth_col"] = True

In [24]:
dataframe_from_e

Unnamed: 0,first_col,second_col,third_col,fourth_col
a,1.0,1.0,0,True
b,2.0,2.0,1,True
c,,3.0,2,True


In [25]:
dataframe_from_e.drop("second_col",axis=1)

Unnamed: 0,first_col,third_col,fourth_col
a,1.0,0,True
b,2.0,1,True
c,,2,True


## Practice Section (15 minutes)

1. Create a `Series` object with `int64` data type and size of 5. Give it a unique name.
2. Create a `Series` object with `float64` data type and size of 8. Give it a unique name.
3. Create a `Series` object with `object` data type and size of 3. Give it a unique name.
4. Create a `DataFrame` object with all `int64` data type and shape of `(5,2)`.
5. Create a `DataFrame` object with `float64`, `float64` and `object` data type and shape of `(8,3)`, columns in that order. Make use of the `Series` object created in Practice 1, 2, and 3.

## Bonus

If you would like to create a new copy of `Series` or `DataFrame`, use `copy` method instead of just reassigning to another variable. `copy` method (where default parameter of `deep` is `True`) creates a new copy of the specified data structure along with all the child elements.

In [26]:
pointer_to_b = b
assert pointer_to_b is b, "pointer_to_b is b"

In [27]:
try:
    copy_of_b = b.copy()
    assert copy_of_b is b, "copy_of_b is not b"
except AssertionError as e:
    print(e)

copy_of_b is not b


# Useful resources
- https://pandas.pydata.org/community/ecosystem.html
- https://pandas.pydata.org/pandas-docs/stable/user_guide/dsintro.html

**Copyright (C) 2021  Lee Kian Yang**

This program is licensed under MIT license.