In [1]:
import pandas

In [2]:
pandas.__version__

'0.25.1'

In [3]:
import pandas as pd

In [4]:
import numpy as np

In [9]:
#A Pandas Series is a one-dimensional array of indexed data
sr=pd.Series(['a','b','c','d','e'])

In [12]:
sr

0    a
1    b
2    c
3    d
4    e
dtype: object

In [10]:
#As we see in the preceding output, the Series wraps both a sequence of values and a
#sequence of indices, which we can access with the values and index attributes. The
#values are simply a familiar NumPy array:
sr.values

array(['a', 'b', 'c', 'd', 'e'], dtype=object)

In [11]:
#The index is an array-like object of type pd.Index
sr.index

RangeIndex(start=0, stop=5, step=1)

In [13]:
sr[1]

'b'

In [14]:
sr[2:5]

2    c
3    d
4    e
dtype: object

In [16]:
#giving index 
sr=pd.Series([1,2,3,4,5],index=['a','b','c','d','e'])   # We can even use noncontiguous or nonsequential indices:

In [17]:
sr['e']

5

In [18]:
#Series-as-dictionary analogy even more clear by constructing a
#Series object directly from a Python dictionary:

In [19]:
population_dict = {'California': 38332521,
 'Texas': 26448193,
 'New York': 19651127,
 'Florida': 19552860,
 'Illinois': 12882135}


In [21]:
population=pd.Series(population_dict)

In [22]:
population['Texas']

26448193

In [23]:
# Series will be created where the index is drawn from the sorted keys

In [25]:
population['California':'Florida']

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
dtype: int64

In [26]:
#  the index can be explicitly set if a different result is preferred

In [27]:
 pd.Series({2:'a', 1:'b', 3:'c'}, index=[3, 2])

3    c
2    a
dtype: object

## The Pandas DataFrame Object

In [28]:
#a DataFrame is an analog of a two-dimensional array with both flexible row indices and flexible
#column names. Just as you might think of a two-dimensional array as an ordered
#sequence of aligned one-dimensional columns, you can think of a DataFrame as a
#sequence of aligned Series objects. Here, by “aligned” we mean that they share the
#same index.


In [37]:
area_dict = {'California': 423967, 'Texas': 695662, 'New York': 141297,
 'Florida': 170312, 'Illinois': 149995,'Bombay':200000}

In [38]:
area=pd.Series(area_dict)

In [39]:
area

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
Bombay        200000
dtype: int64

In [40]:
states=pd.DataFrame({'population':population,'area':area})

In [41]:
#the DataFrame has an index attribute that gives access to the
#index labels
states.index

Index(['Bombay', 'California', 'Florida', 'Illinois', 'New York', 'Texas'], dtype='object')

In [42]:
#Additionally, the DataFrame has a columns attribute, which is an Index object holding
#the column labels
states.columns

Index(['population', 'area'], dtype='object')

In [43]:
states['area']

Bombay        200000
California    423967
Florida       170312
Illinois      149995
New York      141297
Texas         695662
Name: area, dtype: int64

In [44]:
states['population']

Bombay               NaN
California    38332521.0
Florida       19552860.0
Illinois      12882135.0
New York      19651127.0
Texas         26448193.0
Name: population, dtype: float64

## Constructing DataFrame objects

In [45]:
#From a single Series object.

In [46]:
#A DataFrame is a collection of Series objects, and a 
#singlecolumn DataFrame can be constructed from a single Series

In [48]:
type(population)

pandas.core.series.Series

In [47]:
pd.DataFrame(population,columns=['sample_population'])

Unnamed: 0,sample_population
California,38332521
Texas,26448193
New York,19651127
Florida,19552860
Illinois,12882135


In [54]:
# From a list of dicts

pd.DataFrame([{'a': 1, 'b': 2}, {'b': 3, 'c': 4},{'c':12}])

Unnamed: 0,a,b,c
0,1.0,2.0,
1,,3.0,4.0
2,,,12.0


In [52]:
# From a two-dimensional NumPy array. 

pd.DataFrame(np.random.rand(3,2),columns=['a','b'],index=[1,2,3])

Unnamed: 0,a,b
1,0.866434,0.881964
2,0.520183,0.176948
3,0.872236,0.110042
