In [1]:
import numpy as np
import pandas as pd

# Series

In [2]:
obj = pd.Series([4, 7, -5, 3])
obj

0    4
1    7
2   -5
3    3
dtype: int64

In [3]:
obj.values

array([ 4,  7, -5,  3])

In [4]:
obj.index

RangeIndex(start=0, stop=4, step=1)

In [5]:
number_series = pd.Series(np.arange(1,10,2))
number_series

0    1
1    3
2    5
3    7
4    9
dtype: int64

In [6]:
# Passing a dictionary to get a series.
pd.Series({"1":1,"2":2,"3":3,"4":4,"5":5})

1    1
2    2
3    3
4    4
5    5
dtype: int64

In [7]:
# Providing index for a series.
number_info = {"1":1,"2":2,"3":3,"4":4,"5":5}
pd.Series(np.arange(1,6),number_info.keys())

1    1
2    2
3    3
4    4
5    5
dtype: int64

In [8]:
obj2 = pd.Series(index=[4, 7, -5, 3], data=['d', 'b', 'a', 'c'])
obj2

 4    d
 7    b
-5    a
 3    c
dtype: object

In [9]:
obj2 = pd.Series([4, 7, -5, 3], index=['d', 'b', 'a', 'c'])
obj2

d    4
b    7
a   -5
c    3
dtype: int64

In [10]:
# Checking Index.
obj2.index

Index(['d', 'b', 'a', 'c'], dtype='object')

In [11]:
obj2.values

array([ 4,  7, -5,  3])

In [12]:
obj2.index

Index(['d', 'b', 'a', 'c'], dtype='object')

In [15]:
pd.read_html("http://www.fdic.gov/bank/individual/failed/banklist.html")

[                               Bank Name        City  ST   CERT  \
 0       City National Bank of New Jersey      Newark  NJ  21111   
 1                          Resolute Bank      Maumee  OH  58317   
 2                  Louisa Community Bank      Louisa  KY  58112   
 3                   The Enloe State Bank      Cooper  TX  10716   
 4    Washington Federal Bank for Savings     Chicago  IL  30570   
 ..                                   ...         ...  ..    ...   
 554                   Superior Bank, FSB    Hinsdale  IL  32646   
 555                  Malta National Bank       Malta  OH   6629   
 556      First Alliance Bank & Trust Co.  Manchester  NH  34264   
 557    National State Bank of Metropolis  Metropolis  IL   3815   
 558                     Bank of Honolulu    Honolulu  HI  21029   
 
                    Acquiring Institution       Closing Date       Updated Date  
 0                        Industrial Bank   November 1, 2019  December 19, 2019  
 1                

In [16]:
len(pd.read_html("http://www.fdic.gov/bank/individual/failed/banklist.html"))

1

In [17]:
pd.DataFrame(pd.read_html("http://www.fdic.gov/bank/individual/failed/banklist.html")[0])

Unnamed: 0,Bank Name,City,ST,CERT,Acquiring Institution,Closing Date,Updated Date
0,City National Bank of New Jersey,Newark,NJ,21111,Industrial Bank,"November 1, 2019","December 19, 2019"
1,Resolute Bank,Maumee,OH,58317,Buckeye State Bank,"October 25, 2019","December 19, 2019"
2,Louisa Community Bank,Louisa,KY,58112,Kentucky Farmers Bank Corporation,"October 25, 2019","December 19, 2019"
3,The Enloe State Bank,Cooper,TX,10716,"Legend Bank, N. A.","May 31, 2019","December 19, 2019"
4,Washington Federal Bank for Savings,Chicago,IL,30570,Royal Savings Bank,"December 15, 2017","July 24, 2019"
...,...,...,...,...,...,...,...
554,"Superior Bank, FSB",Hinsdale,IL,32646,"Superior Federal, FSB","July 27, 2001","August 19, 2014"
555,Malta National Bank,Malta,OH,6629,North Valley Bank,"May 3, 2001","November 18, 2002"
556,First Alliance Bank & Trust Co.,Manchester,NH,34264,Southern New Hampshire Bank & Trust,"February 2, 2001","February 18, 2003"
557,National State Bank of Metropolis,Metropolis,IL,3815,Banterra Bank of Marion,"December 14, 2000","March 17, 2005"


## Resuming series.

In [18]:
obj2

d    4
b    7
a   -5
c    3
dtype: int64

In [19]:
obj2['d']

4

In [20]:
# Fancy Indexing.
obj2[['c', 'a', 'd']]

c    3
a   -5
d    4
dtype: int64

In [21]:
obj2[obj2<0]

a   -5
dtype: int64

In [22]:
# Multiplication operation on entire series.
obj2 * 2

d     8
b    14
a   -10
c     6
dtype: int64

In [23]:
np.exp(obj2)

d      54.598150
b    1096.633158
a       0.006738
c      20.085537
dtype: float64

In [24]:
# Checking membership in a series.
"b" in obj2

True

In [25]:
# Creating a series by passing a dictionary.
sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}
obj3 = pd.Series(sdata)
obj3

Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64

In [26]:
# Keeping key order as per dictionary.
states = ['California', 'Ohio', 'Oregon', 'Texas']
obj4 = pd.Series(sdata, index=states)
obj4

California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
dtype: float64

In [27]:
pk_states = ["Karachi", "Peshawar", "Lahore", "Islamabad", "Sailkot"]
obj5 = pd.Series(sdata, index=pk_states)
obj5

Karachi     NaN
Peshawar    NaN
Lahore      NaN
Islamabad   NaN
Sailkot     NaN
dtype: float64

In [28]:
pk_sdata = {'Karachi': 35000, 'Peshawar': 71000, 'Lahore': 16000, 'Islamabad': 5000}
pk_states = ["Karachi", "Peshawar", "Lahore", "Islamabad", "Sailkot"]
obj6 = pd.Series(pk_sdata, pk_states)
obj6

Karachi      35000.0
Peshawar     71000.0
Lahore       16000.0
Islamabad     5000.0
Sailkot          NaN
dtype: float64

In [29]:
pk_sdata = {'Karachi': 35000, 'Peshawar': 71000, 'Lahore': 16000, 'Faislabad': 5000}
pk_states = ["Karachi", "Peshawar", "Lahore", "Islamabad", "Sailkot"]
obj7 = pd.Series(pk_sdata, pk_states)
obj7

Karachi      35000.0
Peshawar     71000.0
Lahore       16000.0
Islamabad        NaN
Sailkot          NaN
dtype: float64

In [30]:
# Checking missing values.
obj7.isnull()

Karachi      False
Peshawar     False
Lahore       False
Islamabad     True
Sailkot       True
dtype: bool

In [31]:
# Checking non-missing values.
obj7.notnull()

Karachi       True
Peshawar      True
Lahore        True
Islamabad    False
Sailkot      False
dtype: bool

### Performing Arthematic Operations with Series.

In [32]:
obj3

Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64

In [33]:
obj4

California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
dtype: float64

In [34]:
obj3 + obj4

California         NaN
Ohio           70000.0
Oregon         32000.0
Texas         142000.0
Utah               NaN
dtype: float64

In [35]:
obj3 * obj4

California             NaN
Ohio          1.225000e+09
Oregon        2.560000e+08
Texas         5.041000e+09
Utah                   NaN
dtype: float64

In [36]:
obj3 / obj4

California    NaN
Ohio          1.0
Oregon        1.0
Texas         1.0
Utah          NaN
dtype: float64

In [37]:
obj3 - obj4

California    NaN
Ohio          0.0
Oregon        0.0
Texas         0.0
Utah          NaN
dtype: float64

### Name attribute.

In [38]:
obj4

California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
dtype: float64

In [39]:
obj4.name = 'population'

In [40]:
obj4.index.name = "state"

In [41]:
obj4

state
California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
Name: population, dtype: float64

In [42]:
## Altering Index of Series.
obj.index = ['Bob', 'Steve', 'Jeff', 'Ryan']
obj

Bob      4
Steve    7
Jeff    -5
Ryan     3
dtype: int64

In [43]:
# ValueError: Length mismatch: Expected axis has 4 elements, new values have 5 elements
#
# obj.index = ['Bob', 'Steve', 'Jeff', 'Ryan', "Bruce"]
# obj

In [44]:
series1  = pd.Series(np.arange(1,11))
series1

0     1
1     2
2     3
3     4
4     5
5     6
6     7
7     8
8     9
9    10
dtype: int64

In [45]:
series2 = series1.copy()
series2

0     1
1     2
2     3
3     4
4     5
5     6
6     7
7     8
8     9
9    10
dtype: int64

In [46]:
series3 = series1
series3[1:4] = 1

In [47]:
series3 + series3
series1

0     1
1     1
2     1
3     1
4     5
5     6
6     7
7     8
8     9
9    10
dtype: int64

In [48]:
series2 # Data is intact because we copied it explicity from series1.

0     1
1     2
2     3
3     4
4     5
5     6
6     7
7     8
8     9
9    10
dtype: int64

# DataFrames

In [49]:
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada'],
'year': [2000, 2001, 2002, 2001, 2002, 2003],
'pop': [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}
frame = pd.DataFrame(data)

In [50]:
frame

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4
4,Nevada,2002,2.9
5,Nevada,2003,3.2


In [51]:
# For large DataFrames, the head method selects only the first five rows:
frame.head()

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4
4,Nevada,2002,2.9


In [52]:
frame.head(n=2)

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7


In [53]:
frame.head(n=8) #No exception.

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4
4,Nevada,2002,2.9
5,Nevada,2003,3.2


### Accessing columns as attribute.

In [62]:
#Accessing columns.
frame.pop("pop")

0    1.5
1    1.7
2    3.6
3    2.4
4    2.9
5    3.2
Name: pop, dtype: float64

### Accessing columns dictionary way.

In [58]:
frame["state"]

0      Ohio
1      Ohio
2      Ohio
3    Nevada
4    Nevada
5    Nevada
Name: state, dtype: object

In [59]:
frame["pop"]

0    1.5
1    1.7
2    3.6
3    2.4
4    2.9
5    3.2
Name: pop, dtype: float64

In [60]:
frame.year

0    2000
1    2001
2    2002
3    2001
4    2002
5    2003
Name: year, dtype: int64

In [64]:
# AttributeError: 'DataFrame' object has no attribute 'year2'
# frame.year2

In [65]:
frame2 = frame.copy()
frame2

Unnamed: 0,state,year
0,Ohio,2000
1,Ohio,2001
2,Ohio,2002
3,Nevada,2001
4,Nevada,2002
5,Nevada,2003


In [66]:
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada'],
'year': [2000, 2001, 2002, 2001, 2002, 2003],
'pop': [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}
frame2 = pd.DataFrame(data)

In [67]:
frame2

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4
4,Nevada,2002,2.9
5,Nevada,2003,3.2


In [73]:
#Adding a new column and assigning it a scalar value.
frame2["debt"] = 16.5
frame2

Unnamed: 0,state,year,pop,debt
0,Ohio,2000,1.5,16.5
1,Ohio,2001,1.7,16.5
2,Ohio,2002,3.6,16.5
3,Nevada,2001,2.4,16.5
4,Nevada,2002,2.9,16.5
5,Nevada,2003,3.2,16.5


In [76]:
frame2["balance"] = 20000
frame2

Unnamed: 0,state,year,pop,debt,balance
0,Ohio,2000,1.5,16.5,20000
1,Ohio,2001,1.7,16.5,20000
2,Ohio,2002,3.6,16.5,20000
3,Nevada,2001,2.4,16.5,20000
4,Nevada,2002,2.9,16.5,20000
5,Nevada,2003,3.2,16.5,20000


In [80]:
type(frame2['debt'][0])

numpy.float64

In [None]:
frame2['debt'] = np.arange(6.)
frame2