In [1]:
import pandas as pd
import numpy as np

In [3]:
pd.__version__

'1.2.4'

In [4]:
"""
Pandas Object : Series
"""

# Create a series from an array

ser=pd.Series([0.25,0.5,0.75,1.0])
print(ser)

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64


In [5]:
# Two main attributes: values and index
arr=ser.values
print(arr)

ind=ser.index
print(ind)
print(ind.values)

[0.25 0.5  0.75 1.  ]
RangeIndex(start=0, stop=4, step=1)
[0 1 2 3]


In [6]:
# Label based indexing

ser=pd.Series([0.25,0.5,0.75,1.0],index=['a','b','c','d'])
print(ser)

print(ser['a'])

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64
0.25


In [7]:
"""
Create a series from a dictionary
"""

population_dict={"California":38332521,
                "Texas":26448193,
                "New York":19651127,
                "Florida":19552860,
                "Illinois":12882135}

population=pd.Series(population_dict)
print(population)

print(population['California':'New York'])

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
dtype: int64
California    38332521
Texas         26448193
New York      19651127
dtype: int64


In [8]:
"""
Pandas object: DataFrame
"""

area_dict={"California":423967,
                "Texas":695662,
                "New York":141297,
                "Florida":170312,
                "Illinois":149995}

area=pd.Series(area_dict)
print(area)

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
dtype: int64


In [9]:
# Construct a DataFrame contaning 'population' and 'area' Series

states=pd.DataFrame({'population':population, 'area':area})
print(states)

print('--------')

print(states.index,'\n')
print(states.columns)

            population    area
California    38332521  423967
Texas         26448193  695662
New York      19651127  141297
Florida       19552860  170312
Illinois      12882135  149995
--------
Index(['California', 'Texas', 'New York', 'Florida', 'Illinois'], dtype='object') 

Index(['population', 'area'], dtype='object')


In [10]:
print(states['area'])


California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
Name: area, dtype: int64


In [11]:
arr=np.random.rand(3,2)
print(arr,'\n')

df=pd.DataFrame(arr,columns=['foo','bar'],index=['a','b','c'])

print(df)

[[0.95503316 0.32011568]
 [0.33122207 0.49872354]
 [0.7880096  0.75455584]] 

        foo       bar
a  0.955033  0.320116
b  0.331222  0.498724
c  0.788010  0.754556


In [12]:
"""
Series object manipulation : dictionary-style
"""

ser=pd.Series([0.25,0.5,0.75,1.0],index=['a','b','c','d'])

print(ser,'\n')
print(ser['b'])
print(ser[0])
print('a' in ser)
print(0.25 in ser)

print(ser.keys())
print(ser.index)


a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64 

0.5
0.25
True
False
Index(['a', 'b', 'c', 'd'], dtype='object')
Index(['a', 'b', 'c', 'd'], dtype='object')


In [13]:
"""
Series object manipulation : array-style
"""

print(ser.keys())
ser['e']=1.25
ser['a']=0.125

print(ser)

Index(['a', 'b', 'c', 'd'], dtype='object')
a    0.125
b    0.500
c    0.750
d    1.000
e    1.250
dtype: float64


In [14]:
"""
DataFrame object manipulation
"""

states['density']=states['population']/states['area']
print(states)

            population    area     density
California    38332521  423967   90.413926
Texas         26448193  695662   38.018740
New York      19651127  141297  139.076746
Florida       19552860  170312  114.806121
Illinois      12882135  149995   85.883763


In [15]:
states.loc['California':'New York']


Unnamed: 0,population,area,density
California,38332521,423967,90.413926
Texas,26448193,695662,38.01874
New York,19651127,141297,139.076746


In [16]:
states.iloc[:4]

Unnamed: 0,population,area,density
California,38332521,423967,90.413926
Texas,26448193,695662,38.01874
New York,19651127,141297,139.076746
Florida,19552860,170312,114.806121


In [17]:
print(states.loc[states.density>100,['population','density']],'\n')

          population     density
New York    19651127  139.076746
Florida     19552860  114.806121 



In [18]:
# Masking and fancy indexing using the loc indexer

print(states.loc[states.density>100,['population','density']],'\n')

          population     density
New York    19651127  139.076746
Florida     19552860  114.806121 

