# Pandas Fundamentals

## Creating Pandas Series

In [1]:
import pandas as pd

# this is index array
patients = [0, 1, 2, 3]
# this is information array
effective = [True, True, False, False]

effective_series = pd.Series(effective, index=patients)

In [2]:
effective_series.head()

0     True
1     True
2    False
3    False
dtype: bool

In [3]:
# index array can also have strings, floating-point numbers and generic(hashables)
patients = ['a', 'b', 'c', 'd']
# this is information array
effective = [True, True, False, False]

effective_series = pd.Series(effective, index=patients)

In [4]:
effective_series.head()

a     True
b     True
c    False
d    False
dtype: bool

## Creating Pandas DataFrame

In [5]:
# creating index
patients = ["a", "b", "c", "d"]
# creating data
columns = {
    "sys_initial": [120, 126, 130, 115],
    "dia_initial": [75, 85, 90, 87],
    "sys_final": [115, 123, 130, 118],
    "dia_final": [70, 82, 92, 87]
}
# creating dataframe
df = pd.DataFrame(columns, index=patients)

In [6]:
df.head()

Unnamed: 0,sys_initial,dia_initial,sys_final,dia_final
a,120,75,115,70
b,126,85,123,82
c,130,90,130,92
d,115,87,118,87


In [7]:
# creating dataframe from pd-series
patients = ["a", "b", "c", "d"]
# creating data
columns = {
    "sys_initial": pd.Series([120, 126, 130, 115], index=patients),
    "dia_initial": pd.Series([75, 85, 90, 87], index=patients),
    "sys_final": pd.Series([115, 123, 130, 118], index=patients),
    "dia_final": pd.Series([70, 82, 92, 87], index=patients)
}
# creating dataframe
df = pd.DataFrame(columns)

In [8]:
df.head()

Unnamed: 0,sys_initial,dia_initial,sys_final,dia_final
a,120,75,115,70
b,126,85,123,82
c,130,90,130,92
d,115,87,118,87


## Indexing series and dataframe objects

In [9]:
# Retrieving data from pd.series by key
effective_series.loc["a"]

True

In [10]:
# Retrieving data from pd.series by index
effective_series.iloc[0]

True

In [11]:
# Retrieving data from pd.dataframe by key
df.loc["a"]
# This will return a pd.series

sys_initial    120
dia_initial     75
sys_final      115
dia_final       70
Name: a, dtype: int64

In [12]:
# Retrieving data from pd.dataframe by index
df.iloc[0]

sys_initial    120
dia_initial     75
sys_final      115
dia_final       70
Name: a, dtype: int64

In [13]:
# Retrieving a specific cell by keys
df.loc["a", "sys_initial"]

120

In [14]:
# Retrieving a specific cell by keys. second method
df.loc["a"].loc["sys_initial"]

120

In [15]:
# Retrieving a specific cell by index
df.iloc[0, 1]

75

In [16]:
# Retrieving a specific cell by index. second method
df.iloc[0].iloc[1]

75

In [17]:
# Retrieving a column by column attribute
df.sys_initial

a    120
b    126
c    130
d    115
Name: sys_initial, dtype: int64

In [18]:
# or 
df["sys_initial"]

a    120
b    126
c    130
d    115
Name: sys_initial, dtype: int64

In [19]:
# or
df[df.columns[2]]

a    115
b    123
c    130
d    118
Name: sys_final, dtype: int64

In [20]:
# or
df.iloc[:, 2]

a    115
b    123
c    130
d    118
Name: sys_final, dtype: int64

In [21]:
# Increasing performance by sorting

# Create a series with duplicate index
index = list(range(1000)) + list(range(1000))

# Accessing a normal series is a O(N) operation
series = pd.Series(range(2000), index=index)

In [22]:
# Sorting will improve the look-up scaling to O(log(N))
series.sort_index(inplace=True)