In [47]:
import pandas as pd
import numpy as np

# Pandas data structures: the Series , DataFrame , and Index .

# The Pandas Series Object

In [2]:
#A Pandas Series is a one-dimensional array of indexed data. It can be created from a
#list or array as follows:

In [3]:
s=pd.Series([1,4,2,8,10])

In [4]:
s

0     1
1     4
2     2
3     8
4    10
dtype: int64

In [5]:
#the Series wraps both a sequence of values and a
#sequence of indices, which we can access with the values and index attributes.

In [6]:
s.values

array([ 1,  4,  2,  8, 10])

In [7]:
s.index

RangeIndex(start=0, stop=5, step=1)

In [8]:
s[1]

4

In [9]:
s[1:4]

1    4
2    2
3    8
dtype: int64

In [10]:
data=pd.Series([1,5,3,2],index=['a','b','c','d'])

In [11]:
data

a    1
b    5
c    3
d    2
dtype: int64

In [12]:
data.values

array([1, 5, 3, 2])

In [13]:
data.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [14]:
data['c']

3

# Series as specialized dictionary

In [15]:
ser=pd.Series({'name':'Test','age':20,'gender':'male','mobile':12345555})

In [16]:
ser

name          Test
age             20
gender        male
mobile    12345555
dtype: object

In [17]:
ser.values

array(['Test', 20, 'male', 12345555], dtype=object)

In [18]:
ser.index

Index(['name', 'age', 'gender', 'mobile'], dtype='object')

In [19]:
ser['mobile']

12345555

In [20]:
ser['name':'gender']

name      Test
age         20
gender    male
dtype: object

In [21]:
#data can be a scalar, which is repeated to fill the specified index

In [22]:
data=pd.Series(5,index=[100,200,300])

In [23]:
data

100    5
200    5
300    5
dtype: int64

In [24]:
#the index can be explicitly set if a different result is preferred

In [25]:
data=pd.Series({2:'a',1:'b',4:'d'},index=[1,2])

In [26]:
data

1    b
2    a
dtype: object

# The Pandas DataFrame Object

# DataFrame from Series

In [27]:
population=pd.Series({'Mumbai':1200000,'Chennai':1300000,'Indore':1100000,'Bangalore':2000000})

In [28]:
area=pd.Series({'Mumbai':15000,'Chennai':13000,'Indore':29000,'Bangalore':34000})

In [29]:
population

Mumbai       1200000
Chennai      1300000
Indore       1100000
Bangalore    2000000
dtype: int64

In [30]:
area

Mumbai       15000
Chennai      13000
Indore       29000
Bangalore    34000
dtype: int64

In [31]:
df=pd.DataFrame({'Population':population,'Area':area})

In [32]:
df

Unnamed: 0,Population,Area
Mumbai,1200000,15000
Chennai,1300000,13000
Indore,1100000,29000
Bangalore,2000000,34000


In [33]:
df.index

Index(['Mumbai', 'Chennai', 'Indore', 'Bangalore'], dtype='object')

In [35]:
df.columns

Index(['Population', 'Area'], dtype='object')

In [37]:
df['Area']

Mumbai       15000
Chennai      13000
Indore       29000
Bangalore    34000
Name: Area, dtype: int64

# Constructing DataFrame objects

In [38]:
#From a single Series object

In [39]:
pd.DataFrame(population, columns=['population'])

Unnamed: 0,population
Mumbai,1200000
Chennai,1300000
Indore,1100000
Bangalore,2000000


In [40]:
#From a list of dicts

In [41]:
pd.DataFrame([{'a':i,'b':3*i+i/2} for i in range(4)])

Unnamed: 0,a,b
0,0,0.0
1,1,3.5
2,2,7.0
3,3,10.5


In [42]:
#Even if some keys in the dictionary are missing, Pandas will fill them in with NaN

In [43]:
pd.DataFrame([{'a':1,'b':3},{'b':4,'d':7}])

Unnamed: 0,a,b,d
0,1.0,3,
1,,4,7.0


In [44]:
#From a dictionary of Series objects

In [45]:
pd.DataFrame({'Population':population,'Area':area})

Unnamed: 0,Population,Area
Mumbai,1200000,15000
Chennai,1300000,13000
Indore,1100000,29000
Bangalore,2000000,34000


In [46]:
#From a two-dimensional NumPy array.

In [48]:
pd.DataFrame(np.random.randint(0,10,size=(3,4)))

Unnamed: 0,0,1,2,3
0,2,4,3,7
1,1,9,1,8
2,4,9,2,8


In [49]:
#providing labels to columns and also providing indexes

In [50]:
pd.DataFrame(np.random.randint(0,10,size=(3,4)),index=['a','b','c'],columns=['aa','bb','cc','dd'])

Unnamed: 0,aa,bb,cc,dd
a,5,2,3,6
b,1,5,4,6
c,9,9,9,5


In [51]:
#From a NumPy structured array

In [52]:
A = np.zeros(3, dtype=[('A', 'i8'), ('B', 'f8')])

In [53]:
A

array([(0, 0.), (0, 0.), (0, 0.)], dtype=[('A', '<i8'), ('B', '<f8')])

In [54]:
pd.DataFrame(A)

Unnamed: 0,A,B
0,0,0.0
1,0,0.0
2,0,0.0


# The Pandas Index Object

In [55]:
#Index object is an interesting
#structure in itself, and it can be thought of either as an immutable array or as an
#ordered set

In [56]:
ind = pd.Index([2, 3, 5, 7, 11])

In [57]:
ind

Int64Index([2, 3, 5, 7, 11], dtype='int64')

In [58]:
#The Index object in many ways operates like an array. For example, we can use stan‐
#dard Python indexing notation to retrieve values or slices

In [59]:
ind[1]

3

In [60]:
ind[::2]

Int64Index([2, 5, 11], dtype='int64')

In [61]:
#Index objects also have many of the attributes familiar from NumPy arrays

In [62]:
print(ind.size, ind.shape, ind.ndim, ind.dtype)

5 (5,) 1 int64


In [64]:
#One difference between Index objects and NumPy arrays is that indices are immutable that is, they cannot be modified via the normal means


# Index as ordered set

In [65]:
#Pandas objects are designed to facilitate operations such as joins across datasets,
#which depend on many aspects of set arithmetic. The Index object follows many of
#the conventions used by Python’s built-in set data structure, so that unions, intersec‐
#tions, differences, and other combinations can be computed in a familiar way

In [66]:
indA = pd.Index([1, 3, 5, 7, 9])
indB = pd.Index([2, 3, 5, 7, 11])

In [67]:
indA & indB
# intersection

  indA & indB


Int64Index([3, 5, 7], dtype='int64')

In [68]:
indA | indB
# union

  indA | indB


Int64Index([1, 2, 3, 5, 7, 9, 11], dtype='int64')

In [69]:
indA ^ indB
# symmetric difference

  indA ^ indB


Int64Index([1, 2, 9, 11], dtype='int64')

In [70]:
#These operations may also be accessed via object methods—for example, indA.intersection(indB)

# Thank You