# <u>Introducing Pandas Objects</u>

In [1]:
import pandas as pd
pd.__version__

'1.4.4'

In [2]:
import numpy as np

## The Pandas Series Object

In [3]:
data = pd.Series([0.25, 0.5, 0.75, 1])
data

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64

In [5]:
data.values

array([0.25, 0.5 , 0.75, 1.  ])

In [4]:
type(data.values)

numpy.ndarray

In [6]:
data.index

RangeIndex(start=0, stop=4, step=1)

In [7]:
type(data.index)

pandas.core.indexes.range.RangeIndex

In [8]:
data[0]

0.25

In [9]:
data[1:3]

1    0.50
2    0.75
dtype: float64

### Series as Generalized NumPy Array

In [12]:
# we can use strings as an index

data = pd.Series([0.25, 0.5, 0.75, 1.0],
           index=['a', 'b', 'c', 'd'])
data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [13]:
data['b']

0.5

In [14]:
# We can even use noncontiguous or nonsequential indices

data = pd.Series([0.25, 0.5, 0.75, 1.0],
           index=[2, 5, 3, 7])
data

2    0.25
5    0.50
3    0.75
7    1.00
dtype: float64

In [15]:
data[5]

0.5

### Series as Specialized Dictionary

In [6]:
num_letters_dict = {"Koushik":7, "Raymond":7, "Francis":7, "Thomas":6,}
num_letters_sereis = pd.Series(num_letters_dict)

In [7]:
num_letters_sereis

Koushik    7
Raymond    7
Francis    7
Thomas     6
dtype: int64

In [8]:
num_letters_sereis["Koushik"]

7

In [9]:
num_letters_sereis["Koushik":"Francis"]

Koushik    7
Raymond    7
Francis    7
dtype: int64

In [10]:
a = [1,2,3,4,5]
a[0:2]

[1, 2]

### Constructing Series Objects

In [13]:
# data can be a list or NumPy array, in which case index defaults to an integer sequence

ar = np.random.rand(7)
ar_series = pd.Series(ar)
ar_series

0    0.761488
1    0.008723
2    0.423022
3    0.752773
4    0.702290
5    0.666283
6    0.620289
dtype: float64

In [14]:
# data can be a scalar, which is repeated to fill the specified index

scaler_series = pd.Series(7, index=[7, 14, 21, 28, 35])
scaler_series

7     7
14    7
21    7
28    7
35    7
dtype: int64

In [15]:
# data can be a dictionary, in which case index defaults to the dictionary keys

num_letters_sereis

Koushik    7
Raymond    7
Francis    7
Thomas     6
dtype: int64

In [24]:
# In each case, the index can be explicitly set to control the order or the subset of keys used

pd.Series({2:'a', 1:'b', 3:'c'}, index=[3, 1, 2]) # controlling the order of the keys used

3    c
1    b
2    a
dtype: object

In [25]:
 pd.Series({2:'a', 1:'b', 3:'c'}, index=[1, 2]) # controlling the subset of the keys used

1    b
2    a
dtype: object

In [26]:
 pd.Series({2:'a', 1:'b', 3:'c'}, index=[1, 4])

1      b
4    NaN
dtype: object

## The Pandas DataFrame Object

### DataFrame as Generalized NumPy Array

In [27]:
shername_dict = {"Koushik":"Mahanta", "Raymond":"Reddington", "Francis":"Underwood", "Thomas":"Shelby"}
shername_series = pd.Series(shername_dict)
shername_series

Koushik       Mahanta
Raymond    Reddington
Francis     Underwood
Thomas         Shelby
dtype: object

In [28]:
legends = pd.DataFrame({"Number Of Letters":num_letters_sereis, "Shername":shername_series})
legends

Unnamed: 0,Number Of Letters,Shername
Koushik,7,Mahanta
Raymond,7,Reddington
Francis,7,Underwood
Thomas,6,Shelby


In [29]:
legends.index

Index(['Koushik', 'Raymond', 'Francis', 'Thomas'], dtype='object')

In [30]:
legends.columns

Index(['Number Of Letters', 'Shername'], dtype='object')

### DataFrame as Specialized Dictionary

In [35]:
legends["Shername"]

Koushik       Mahanta
Raymond    Reddington
Francis     Underwood
Thomas         Shelby
Name: Shername, dtype: object

In [34]:
legends["Shername"]["Koushik"]

'Mahanta'

### Constructing DataFrame Objects

#### From a single Series object

In [43]:
pd.DataFrame(num_letters_sereis, columns=["Number of Letters"])

Unnamed: 0,Number of Letters
Koushik,7
Raymond,7
Francis,7
Thomas,6


#### From a list of dicts

In [45]:
pd.DataFrame([{"number":i, "square":i**2} for i in range(0,10)])

Unnamed: 0,number,square
0,0,0
1,1,1
2,2,4
3,3,9
4,4,16
5,5,25
6,6,36
7,7,49
8,8,64
9,9,81


In [48]:
pd.DataFrame([{'a': 1, 'b': 2}, {'b': 3, 'c': 4}])

Unnamed: 0,a,b,c
0,1.0,2,
1,,3,4.0


#### From a dictionary of Series objects

In [49]:
pd.DataFrame({"Shername":shername_series, "Number Of Letters":num_letters_sereis})

Unnamed: 0,Shername,Number Of Letters
Koushik,Mahanta,7
Raymond,Reddington,7
Francis,Underwood,7
Thomas,Shelby,6


#### From a two-dimensional NumPy array

In [54]:
pd.DataFrame(np.random.rand(4,3), columns=["col0", "col1", "col2"], index=["1st", "2nd", "3rd", "4th"])

Unnamed: 0,col0,col1,col2
1st,0.757673,0.127437,0.108955
2nd,0.48806,0.878232,0.859587
3rd,0.775225,0.893361,0.454684
4th,0.395103,0.962049,0.28571


#### From a NumPy structured array

In [56]:
A = np.zeros(3, dtype=[('A', 'i8'), ('B', 'f8')])
A

array([(0, 0.), (0, 0.), (0, 0.)], dtype=[('A', '<i8'), ('B', '<f8')])

In [57]:
pd.DataFrame(A)

Unnamed: 0,A,B
0,0,0.0
1,0,0.0
2,0,0.0


## The Pandas Index Object

In [None]:
set([1,2,2,5]) # python set is a pure set that does not allow duplicate items

{1, 2, 5}

In [None]:
ind = pd.Index([2, 3, 5, 5, 7, 11])
ind

Int64Index([2, 3, 5, 5, 7, 11], dtype='int64')

### Index as Immutable Array

In [63]:
ind[1]

3

In [64]:
ind[::2]

Int64Index([2, 5, 7], dtype='int64')

In [66]:
%xmode minimal

Exception reporting mode: Minimal


In [67]:
# One difference between Index objects and NumPy arrays is that the indices are immutable—that is, they cannot be modified via the normal means

ind[0] = 9

TypeError: Index does not support mutable operations

In [71]:
%xmode verbose

Exception reporting mode: Verbose


### Index as Ordered Set

In [73]:
indA = pd.Index([1, 3, 5, 7, 9])
indB = pd.Index([2, 3, 5, 7, 11])

In [74]:
indA.intersection(indB)

Int64Index([3, 5, 7], dtype='int64')

In [75]:
indA.union(indB)

Int64Index([1, 2, 3, 5, 7, 9, 11], dtype='int64')

In [76]:
indA.symmetric_difference(indB)

Int64Index([1, 2, 9, 11], dtype='int64')

In [78]:
indB.symmetric_difference(indA)

Int64Index([1, 2, 9, 11], dtype='int64')

In [77]:
indA.difference(indB)

Int64Index([1, 9], dtype='int64')

In [79]:
indB.difference(indA)

Int64Index([2, 11], dtype='int64')