# Pandas Data Structures

* Series
* Dataframe
* Panel

In [2]:
import pandas as pd
import numpy as np

## Series 

### Series Creation

In [10]:
np.random.seed(100)
data = np.random.rand(7)
ser = pd.Series(data)
ser

0    0.543405
1    0.278369
2    0.424518
3    0.844776
4    0.004719
5    0.121569
6    0.670749
dtype: float64

#### Create a Series Structure of first 5 months of the year with a specified index of month names:

In [20]:
import calendar as cal
monthName = [cal.month_name[i] for i in np.arange(1,13)]
months = pd.Series(np.arange(1,13),index = monthName)
months

January       1
February      2
March         3
April         4
May           5
June          6
July          7
August        8
September     9
October      10
November     11
December     12
dtype: int32

#### Index of Series


In [24]:
months.index

Index(['January', 'February', 'March', 'April', 'May', 'June', 'July',
       'August', 'September', 'October', 'November', 'December'],
      dtype='object')

#### Series using Python Dictionary

In [67]:
currDict  = {'US' :'dollar','UK' : 'pound','Germany': 'euro',
             'Mexico' : 'peso','Nigeria':'Naira','China':'yuan','Japan':'yen'}
currDict

{'US': 'dollar',
 'UK': 'pound',
 'Germany': 'euro',
 'Mexico': 'peso',
 'Nigeria': 'Naira',
 'China': 'yuan',
 'Japan': 'yen'}

In [68]:
currSeries =pd.Series(currDict)
currSeries

US         dollar
UK          pound
Germany      euro
Mexico       peso
Nigeria     Naira
China        yuan
Japan         yen
dtype: object

### Operations on Series

In [33]:
currDict['China']

'yuan'

#### Assignment Operation

In [36]:
currDict['China']='Yuan'
currDict

{'US': 'dollar',
 'UK': 'pound',
 'Germany': 'euro',
 'Mexico': 'peso',
 'Nigeria': 'Naira',
 'China': 'Yuan',
 'Japan': 'yen'}

In [41]:
currDict.get('UK')

'pound'

#### Slicing 

In [53]:
currSeries[:2]

US    dollar
UK     pound
dtype: object

In [4]:
currVal  = {'US' :73,'UK' : 103,'Germany': 80,
             'Mexico' : 4,'Nigeria':0.003,'China':12,'Japan':0.3}
currVal = pd.Series(currVal)
currVal

US          73.000
UK         103.000
Germany     80.000
Mexico       4.000
Nigeria      0.003
China       12.000
Japan        0.300
dtype: float64

In [62]:
np.mean(currVal)

38.90042857142857

In [64]:
np.std(currVal)

41.24038694517493

In [66]:
currVal*currVal


US          5329.000000
UK         10609.000000
Germany     6400.000000
Mexico        16.000000
Nigeria        0.000009
China        144.000000
Japan          0.090000
dtype: float64

In [5]:
np.sqrt(currVal)

US          8.544004
UK         10.148892
Germany     8.944272
Mexico      2.000000
Nigeria     0.054772
China       3.464102
Japan       0.547723
dtype: float64

### Slicing in Series

In [12]:
currVal[1:]

UK         103.000
Germany     80.000
Mexico       4.000
Nigeria      0.003
China       12.000
Japan        0.300
dtype: float64

In [14]:
currVal[currVal>100]

UK    103.0
dtype: float64

In [18]:
currVal[1:]+currVal[:-2]

China          NaN
Germany    160.000
Japan          NaN
Mexico       8.000
Nigeria      0.006
UK         206.000
US             NaN
dtype: float64

## DateFrame 2-D labeled array

### Using Dictionaries of Series

In [24]:
stockSummaries = {'AMZN':pd.Series([346.15,0.59,459,0.52,589.8,158.88],index =[
    'Closing Price','EPS','Shares Outstanding(M)','Beta','P/E','Market Cap(B)']),
                 'GOOG': pd.Series([1133.43,36.05,335.83,0.87,31.44,380.64],
 index=['Closing price','EPS','Shares Outstanding(M)',
 'Beta','P/E','Market Cap(B)']),'FB': pd.Series([61.48,0.59,2450,104.93,150.92],
 index=['Closing price','EPS','Shares Outstanding(M)',
 'P/E', 'Market Cap(B)']),
'YHOO': pd.Series([34.90,1.27,1010,27.48,0.66,35.36],
 index=['Closing price','EPS','Shares Outstanding(M)',
 'P/E','Beta', 'Market Cap(B)']),
'TWTR':pd.Series([65.25,-0.3,555.2,36.23],
 index=['Closing price','EPS','Shares Outstanding(M)',
 'Market Cap(B)']),
'AAPL':pd.Series([501.53,40.32,892.45,12.44,447.59,0.84],
 index=['Closing price','EPS','Shares Outstanding(M)','P/E',
 'Market Cap(B)','Beta'])}
stockSummaries

{'AMZN': Closing Price            346.15
 EPS                        0.59
 Shares Outstanding(M)    459.00
 Beta                       0.52
 P/E                      589.80
 Market Cap(B)            158.88
 dtype: float64,
 'GOOG': Closing price            1133.43
 EPS                        36.05
 Shares Outstanding(M)     335.83
 Beta                        0.87
 P/E                        31.44
 Market Cap(B)             380.64
 dtype: float64,
 'FB': Closing price              61.48
 EPS                         0.59
 Shares Outstanding(M)    2450.00
 P/E                       104.93
 Market Cap(B)             150.92
 dtype: float64,
 'YHOO': Closing price              34.90
 EPS                         1.27
 Shares Outstanding(M)    1010.00
 P/E                        27.48
 Beta                        0.66
 Market Cap(B)              35.36
 dtype: float64,
 'TWTR': Closing price             65.25
 EPS                       -0.30
 Shares Outstanding(M)    555.20
 Market Cap(B)     

In [26]:
stockDF =pd.DataFrame(stockSummaries)
stockDF

Unnamed: 0,AMZN,GOOG,FB,YHOO,TWTR,AAPL
Beta,0.52,0.87,,0.66,,0.84
Closing Price,346.15,,,,,
Closing price,,1133.43,61.48,34.9,65.25,501.53
EPS,0.59,36.05,0.59,1.27,-0.3,40.32
Market Cap(B),158.88,380.64,150.92,35.36,36.23,447.59
P/E,589.8,31.44,104.93,27.48,,12.44
Shares Outstanding(M),459.0,335.83,2450.0,1010.0,555.2,892.45


In [41]:
stockDF =pd.DataFrame(stockSummaries,index =['Closing price','EPS','Shares Outstanding (M)',
                                             'P/E','Market Cap(B)','Beta'])
stockDF

Unnamed: 0,AMZN,GOOG,FB,YHOO,TWTR,AAPL
Closing price,,1133.43,61.48,34.9,65.25,501.53
EPS,0.59,36.05,0.59,1.27,-0.3,40.32
Shares Outstanding (M),,,,,,
P/E,589.8,31.44,104.93,27.48,,12.44
Market Cap(B),158.88,380.64,150.92,35.36,36.23,447.59
Beta,0.52,0.87,,0.66,,0.84


In [40]:
stockDF_1 =pd.DataFrame(stockSummaries,index =['Closing price','EPS','Shares Outstanding (M)',
                                             'P/E','Market Cap(B)','Beta'],columns =['FB','TWTR','SCNW'])
stockDF_1

Unnamed: 0,FB,TWTR,SCNW
Closing price,61.48,65.25,
EPS,0.59,-0.3,
Shares Outstanding (M),,,
P/E,104.93,,
Market Cap(B),150.92,36.23,
Beta,,,


In [43]:
stockDF.index

Index(['Closing price', 'EPS', 'Shares Outstanding (M)', 'P/E',
       'Market Cap(B)', 'Beta'],
      dtype='object')

In [44]:
stockDF.columns

Index(['AMZN', 'GOOG', 'FB', 'YHOO', 'TWTR', 'AAPL'], dtype='object')

### Using a Dictionary of ndarrays/lists

In [50]:
algos={'search':['DFS','BFS','Binary Search',
 'Linear','ShortestPath (Djikstra)'],
 'sorting': ['Quicksort','Mergesort', 'Heapsort',
 'Bubble Sort', 'Insertion Sort'],
 'machine learning':['RandomForest',
 'K Nearest Neighbor',
 'Logistic Regression',
 'K-Means Clustering',
 'Linear Regression']}
algoDF =pd.DataFrame(algos)
algoDF

Unnamed: 0,search,sorting,machine learning
0,DFS,Quicksort,RandomForest
1,BFS,Mergesort,K Nearest Neighbor
2,Binary Search,Heapsort,Logistic Regression
3,Linear,Bubble Sort,K-Means Clustering
4,ShortestPath (Djikstra),Insertion Sort,Linear Regression


#### Defining Index

In [53]:
pd.DataFrame(algos,index =['algo_1','algo_2','algo_3','algo_4','algo_5'])

Unnamed: 0,search,sorting,machine learning
algo_1,DFS,Quicksort,RandomForest
algo_2,BFS,Mergesort,K Nearest Neighbor
algo_3,Binary Search,Heapsort,Logistic Regression
algo_4,Linear,Bubble Sort,K-Means Clustering
algo_5,ShortestPath (Djikstra),Insertion Sort,Linear Regression


### From a Sturctured array

In [62]:
memberData = np.zeros((4,),dtype=[('Name','<U15'), ('Age','i4'), ('Weight','f2')])

memberData[:] = [('Sanjeev',37,162.4),('Yingluck',45,137.8),
 ('Emeka',28,153.2),
 ('Amy',67,101.3)]
memberDF =pd.DataFrame(memberData)
memberDF

Unnamed: 0,Name,Age,Weight
0,Sanjeev,37,162.375
1,Yingluck,45,137.75
2,Emeka,28,153.25
3,Amy,67,101.3125


In [64]:
pd.DataFrame(memberData,index =['a','b','c','d'])

Unnamed: 0,Name,Age,Weight
a,Sanjeev,37,162.375
b,Yingluck,45,137.75
c,Emeka,28,153.25
d,Amy,67,101.3125


In [72]:
currSeries.name ='currency'
pd.DataFrame(currSeries)

Unnamed: 0,currency
US,dollar
UK,pound
Germany,euro
Mexico,peso
Nigeria,Naira
China,yuan
Japan,yen


### Operations on DataFrame

In [78]:
memberDF

Unnamed: 0,Name,Age,Weight,Height
0,Sanjeev,37,162.375,60
1,Yingluck,45,137.75,60
2,Emeka,28,153.25,60
3,Amy,67,101.3125,60


#### Assignment Operation

In [88]:
memberDF['Height']=60
memberDF

Unnamed: 0,Name,Age,isSenior,Weight,Height
0,Sanjeev,37,False,162.375,60
1,Yingluck,45,False,137.75,60
2,Emeka,28,False,153.25,60
3,Amy,67,True,101.3125,60


#### Deletion Operation

In [96]:
del memberDF['isSenior'];
memberDF

Unnamed: 0,Name,Age,Weight
0,Sanjeev,37,162.375
1,Yingluck,45,137.75
2,Emeka,28,153.25
3,Amy,67,101.3125


#### Column Inserted at the end; to insert a column at a specific location

In [97]:
memberDF.insert(2,'isSenior',memberDF['Age']>60)
memberDF

Unnamed: 0,Name,Age,isSenior,Weight
0,Sanjeev,37,False,162.375
1,Yingluck,45,False,137.75
2,Emeka,28,False,153.25
3,Amy,67,True,101.3125


#### Alignment

In [105]:
ore1DF=pd.DataFrame(np.array([[20,35,25,20],
 [11,28,32,29]]),
columns=['iron','magnesium',
 'copper','silver'])
ore2DF=pd.DataFrame(np.array([[14,34,26,26],
 [33,19,25,23]]),
 columns=['iron','magnesium',
 'gold','silver'])
ore1DF

Unnamed: 0,iron,magnesium,copper,silver
0,20,35,25,20
1,11,28,32,29


In [107]:
ore2DF

Unnamed: 0,iron,magnesium,gold,silver
0,14,34,26,26
1,33,19,25,23


In [109]:
ore2DF + ore1DF

Unnamed: 0,copper,gold,iron,magnesium,silver
0,,,34,69,46
1,,,44,47,52


#### Other mathematical operations

In [112]:
np.sqrt(ore2DF),np.sqrt(ore1DF)

(       iron  magnesium     gold    silver
 0  3.741657   5.830952  5.09902  5.099020
 1  5.744563   4.358899  5.00000  4.795832,
        iron  magnesium    copper    silver
 0  4.472136   5.916080  5.000000  4.472136
 1  3.316625   5.291503  5.656854  5.385165)