In [2]:
import numpy as np
import pandas as pd

###Pandas Series Object 
- flexible numpy arrays
- Faster time in process
- Contains Index and Values

In [3]:
data = pd.Series([10,11,12,13,14,15])

In [4]:
data

0    10
1    11
2    12
3    13
4    14
5    15
dtype: int64

In [5]:
data[0]

10

In [6]:
data[5]

15

In [7]:
type(data.values)

numpy.ndarray

In [8]:
type(data)

pandas.core.series.Series

##Here we see:
- that the values(10,11,12,...) are stored in numpy
- the type of data (entire series) is Panda Series

##WE can get the range in pandas by summoning it


In [9]:
data.index

RangeIndex(start=0, stop=6, step=1)

##Slicing

In [10]:
data[1:3]

1    11
2    12
dtype: int64

##Can attach index seperately

In [11]:
new_data = pd.Series([11,22,33,44,55],index=['a','b','c','d','e'])

In [12]:
new_data['a']

11

In [13]:
new_data['e']

55

##DIfference with Dictionary:
In dictonary you cannot assign the same key name twice to two values whereas in pandas you can assign the same index name in the series to two different series elements

In [14]:
new_data = pd.Series([11,22,33,44,55],index=['a','b','c','a','a'])

In [15]:
new_data

a    11
b    22
c    33
a    44
a    55
dtype: int64

##How to convert a dictionary into Pandas

In [16]:
population_dict = {'California': 38332521,
                   'Texas': 26448193,
                   'New York': 19651127,
                   'Florida': 19552860,
                   'Illinois': 12882135}

In [32]:
population=pd.Series(population_dict,index=['Texas',2,3,4,5])

In [33]:
population

Texas    26448193.0
2               NaN
3               NaN
4               NaN
5               NaN
dtype: float64

In [19]:
population['New York']

19651127

In [20]:
pd.Series(15,index=[100,200,300])

100    15
200    15
300    15
dtype: int64

## PANDAS DATA FRAME OBJECT

In [21]:
city = pd.Series(['Mumbai', 'Chennai', 'Hyderabad', 'Bangalore', 'Gurgaon'], index=[1,2,3,4,5])

In [22]:
city

1       Mumbai
2      Chennai
3    Hyderabad
4    Bangalore
5      Gurgaon
dtype: object

In [23]:
rating = pd.Series([3,4,1,5,2], index=[1,2,3,4,5])

In [24]:
rating

1    3
2    4
3    1
4    5
5    2
dtype: int64

In [25]:
my_pref = pd.DataFrame({'Cities':city, 'Pais\'s rating':rating})

In [26]:
my_pref

Unnamed: 0,Cities,Pais's rating
1,Mumbai,3
2,Chennai,4
3,Hyderabad,1
4,Bangalore,5
5,Gurgaon,2


In [27]:
#Can pull out the index of the DataFrame directly through this command
my_pref.index

Int64Index([1, 2, 3, 4, 5], dtype='int64')

In [28]:
my_pref.columns

Index([u'Cities', u'Pais's rating'], dtype='object')

In [29]:
pd.DataFrame( {'cities':city, 'population':population},columns=['cities','population'])

Unnamed: 0,cities,population
1,Mumbai,
2,Chennai,
3,Hyderabad,
4,Bangalore,
5,Gurgaon,
California,,38332521.0
Florida,,19552860.0
Illinois,,12882135.0
New York,,19651127.0
Texas,,26448193.0


In [75]:
city

1       Mumbai
2      Chennai
3    Hyderabad
4    Bangalore
5      Gurgaon
dtype: object

In [16]:
my_pref = pd.DataFrame({'city':city, 'population':rating})

In [17]:
my_pref

Unnamed: 0,city,population
1,Mumbai,3
2,Chennai,4
3,Hyderabad,1
4,Bangalore,5
5,Gurgaon,2


In [18]:
#Here's how you can create a DataFrame using only one variable and using two [[]] brackets

In [20]:
my_pref[['city']]

Unnamed: 0,city
1,Mumbai
2,Chennai
3,Hyderabad
4,Bangalore
5,Gurgaon


In [22]:
mat = np.random.randint(100,size=(3,2))

In [23]:
mat

array([[ 5, 16],
       [13, 72],
       [ 4, 63]])

In [27]:
#This is how you add column and index names
d = pd.DataFrame(mat,columns=['maths','science'], index=['a','b','c'])

In [28]:
d

Unnamed: 0,maths,science
a,5,16
b,13,72
c,4,63


In [35]:
#This is how you add index to an empty data frame
index = pd.Index(['ab','cd','ef','gh'])

In [36]:
index

Index([u'ab', u'cd', u'ef', u'gh'], dtype='object')

In [37]:
pd.DataFrame(city,index=index)

Unnamed: 0,0
ab,
cd,
ef,
gh,


In [38]:
city

1       Mumbai
2      Chennai
3    Hyderabad
4    Bangalore
5      Gurgaon
dtype: object

In [41]:
#As you can see you can add a defined index for any data frame
pd.DataFrame(city,index=index)

Unnamed: 0,0
ab,
cd,
ef,
gh,


In [44]:
data = pd.Series([0.25,0.50,0.75,1], index=['a','b','c','d'])

In [45]:
data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [46]:
data['a']

0.25

In [48]:
data.keys()

Index([u'a', u'b', u'c', u'd'], dtype='object')

In [49]:
data.index

Index([u'a', u'b', u'c', u'd'], dtype='object')

In [53]:
data.item

<bound method Series.item of a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64>

In [54]:
my_pref

Unnamed: 0,city,population
1,Mumbai,3
2,Chennai,4
3,Hyderabad,1
4,Bangalore,5
5,Gurgaon,2


In [58]:
#Can use slicing to filter data
my_pref[1:3]

Unnamed: 0,city,population
2,Chennai,4
3,Hyderabad,1


In [60]:
num = np.random.random((3,4))

In [61]:
num

array([[ 0.39661658,  0.64782166,  0.88367174,  0.39296185],
       [ 0.00676925,  0.15780079,  0.93546388,  0.90259547],
       [ 0.12674501,  0.57730994,  0.95959692,  0.52764924]])

In [62]:
d = pd.DataFrame(num)

In [63]:
d

Unnamed: 0,0,1,2,3
0,0.396617,0.647822,0.883672,0.392962
1,0.006769,0.157801,0.935464,0.902595
2,0.126745,0.57731,0.959597,0.527649


In [66]:
d[(d<0.5) & (d>0.3)]

Unnamed: 0,0,1,2,3
0,0.396617,,,0.392962
1,,,,
2,,,,


In [74]:
my_pref = pd.DataFrame({'a':[1,2,3], 'b': [5,6,7]}, index=['x','y','z'])

In [75]:
my_pref

Unnamed: 0,a,b
x,1,5
y,2,6
z,3,7


In [76]:
my_pref[my_pref['a']>2]

Unnamed: 0,a,b
z,3,7


In [77]:
# loc - works on labels in the index
# iloc - works on position on the index
# ix - usually behave like loc but falls back to iloc if labels are not present

In [78]:
df = pd.DataFrame({'a':[1,2,3],'b':[4,5,6]}, index = ['x','y','z'])

In [79]:
df

Unnamed: 0,a,b
x,1,4
y,2,5
z,3,6


In [80]:
df.loc('x':'z')

SyntaxError: invalid syntax (<ipython-input-80-2cd216bf2088>, line 1)

In [81]:

s = pd.Series(np.nan, index=[49,48,47,46,45, 1, 2, 3, 4, 5])


In [82]:
s.loc[:3]

49   NaN
48   NaN
47   NaN
46   NaN
45   NaN
1    NaN
2    NaN
3    NaN
dtype: float64

In [91]:
s.ix[1]

nan