# Installing and Using Pandas

In [1]:
import pandas
pandas.__version__

'1.1.3'

In [2]:
import pandas as pd

# Introducing Pandas Objects

In [3]:
import numpy as np
import pandas as pd

In [4]:
data = pd.Series([0.25, 0.5, 0.75, 1.0])
data

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64

In [5]:
data.index

RangeIndex(start=0, stop=4, step=1)

In [6]:
print(data[1])
print(data[1:3])

0.5
1    0.50
2    0.75
dtype: float64


### Series as generalized NumPy array

In [7]:
data = pd.Series([0.25,0.5,0.75,1.0], index=['a','b','c','d'])
data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [10]:
print(data.index)
print(data.values)
#print(data.columns)
print(data['b'])

Index(['a', 'b', 'c', 'd'], dtype='object')
[0.25 0.5  0.75 1.  ]
0.5


In [11]:
data = pd.Series([0.25,0.5,0.75,1.0], index=[2,5,3,7])
data

2    0.25
5    0.50
3    0.75
7    1.00
dtype: float64

In [12]:
print(data.index)

Int64Index([2, 5, 3, 7], dtype='int64')


### Series as specialized dictionary

In [14]:
population_dict = {'California': 38332521,
 'Texas': 26448193,
 'New York': 19651127,
 'Florida': 19552860,
 'Illinois': 12882135}

population = pd.Series(population_dict)
population

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
dtype: int64

In [16]:
print(population.index)
print(population.values)

Index(['California', 'Texas', 'New York', 'Florida', 'Illinois'], dtype='object')
[38332521 26448193 19651127 19552860 12882135]


In [20]:
population['California':'New York'] # include both ends

California    38332521
Texas         26448193
New York      19651127
dtype: int64

### Constructing Series Objects
* pd.Series(data, index=index) where index is an optional argument.

In [21]:
pd.Series([2,4,6])

0    2
1    4
2    6
dtype: int64

In [22]:
pd.Series(5, index=[100,200,300])

100    5
200    5
300    5
dtype: int64

In [30]:
data = pd.Series({2:'a', 1:'b', 3:'c'})
print(data[0:1]) # slice uses the internal(?) index
print(data[1]) # idexing uses the given index

2    a
dtype: object
b


In [31]:
pd.Series({2:'a', 1:'b', 3:'c'}, index=[3, 2])

3    c
2    a
dtype: object

# The Pandas DataFrame Object

### DataFrame as a generalized NumPy array
* a Series is an analog of a one-dimensional array with flexible indices.
* a DataFrame is an analog of a two-dimensional array with both flexible row indices and flexible column names.

In [33]:
area_dict = {'California': 423967, 'Texas': 695662, 'New York': 141297,
 'Florida': 170312, 'Illinois': 149995}
area = pd.Series(area_dict)
area

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
dtype: int64

In [35]:
states = pd.DataFrame({'population': population,
 'area': area})
states

Unnamed: 0,population,area
California,38332521,423967
Texas,26448193,695662
New York,19651127,141297
Florida,19552860,170312
Illinois,12882135,149995


In [37]:
print(type(states))
print(states.index)
print(states.columns)
print(states.values)

<class 'pandas.core.frame.DataFrame'>
Index(['California', 'Texas', 'New York', 'Florida', 'Illinois'], dtype='object')
Index(['population', 'area'], dtype='object')
[[38332521   423967]
 [26448193   695662]
 [19651127   141297]
 [19552860   170312]
 [12882135   149995]]


### DataFrame as specialized dictionary

In [39]:
states['area']

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
Name: area, dtype: int64

In [40]:
states['population']

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
Name: population, dtype: int64

### Constructing DataFrame Objects

##### From a single Series object

In [41]:
pd.DataFrame(population, columns=['population'])

Unnamed: 0,population
California,38332521
Texas,26448193
New York,19651127
Florida,19552860
Illinois,12882135


##### From a list of dicts

In [42]:
data = [{'a': i, 'b': 2* i} for i in range(3)]
pd.DataFrame(data)

Unnamed: 0,a,b
0,0,0
1,1,2
2,2,4


In [43]:
pd.DataFrame([{'a':1, 'b': 2}, {'b':3, 'c':4}])

Unnamed: 0,a,b,c
0,1.0,2,
1,,3,4.0
