In [1]:
import numpy as np
import pandas as pd
pd.__version__

'0.22.0'

## Series objects

In [2]:
data = pd.Series([0.25, 0.5, 0.75, 1.0])
data

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64

In [3]:
print(data.values) # ndarray

[0.25 0.5  0.75 1.  ]


In [4]:
print(data.index)

RangeIndex(start=0, stop=4, step=1)


In [6]:
data[1:3]

1    0.50
2    0.75
dtype: float64

### Объект Series как обобщенный массив Numpy

In [8]:
data = pd.Series([0.25, 0.50, 0.75, 1.0],
                 index=['a', 'b', 'c', 'd'])
data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [10]:
print(data['b'])

0.5


In [12]:
data = pd.Series([0.25, 0.50, 0.75, 1.0],
                 index=[2, 3, 5, 7])

In [14]:
data[3]

0.5

### Объект Series как специализированный словарь


In [16]:
population_dict = {
    'California': 38332521,
    'Texas': 26448193,
    'New York': 19651127,
    'Florida': 19552860,
    'Illinois': 12882135
}
population = pd.Series(population_dict)
population

California    38332521
Florida       19552860
Illinois      12882135
New York      19651127
Texas         26448193
dtype: int64

In [17]:
# значения доступны по ключам
print(population['Florida'])

19552860


In [19]:
# обращение к диапазону
print(population['California':'Illinois'])

California    38332521
Florida       19552860
Illinois      12882135
dtype: int64


### Создание объектов Series

In [24]:
# Общий способ создания объекта: pd.Series(data, index=index)
# data - array, list, dict
a = pd.Series([2, 4, 6])
b = pd.Series(5, index=[100, 200, 300])
c = pd.Series({2:'a', 1: 'b', 3:'c'})
d = pd.Series({2:'a', 1:'b', 3:'c'}, index=[3, 2])
print('a\n',a)
print('='*10)
print('b\n', b)
print('='*10)
print('c\n', c)
print('='*10)
print('d\n', d)

a
 0    2
1    4
2    6
dtype: int64
b
 100    5
200    5
300    5
dtype: int64
c
 1    b
2    a
3    c
dtype: object
d
 3    c
2    a
dtype: object


##  DataFrame objects

### Dataframe как обобщенный массив Numpy

In [25]:
area_dict = {
    'California': 423967,
    'Texas': 695662,
    'New York': 141297,
    'Florida': 170312,
    'Illinois': 149995
}
area = pd.Series(area_dict)
area

California    423967
Florida       170312
Illinois      149995
New York      141297
Texas         695662
dtype: int64

In [26]:
states = pd.DataFrame({
    'population': population,
    'area': area
})
states

Unnamed: 0,area,population
California,423967,38332521
Florida,170312,19552860
Illinois,149995,12882135
New York,141297,19651127
Texas,695662,26448193


In [27]:
states.index

Index(['California', 'Florida', 'Illinois', 'New York', 'Texas'], dtype='object')

In [28]:
states.columns

Index(['area', 'population'], dtype='object')

### Объект DataFrame как специализированный словарь 

In [29]:
states['area']

California    423967
Florida       170312
Illinois      149995
New York      141297
Texas         695662
Name: area, dtype: int64

### Создание объектов DataFrame 

In [30]:
pd.DataFrame(population, columns=['population'])

Unnamed: 0,population
California,38332521
Florida,19552860
Illinois,12882135
New York,19651127
Texas,26448193


In [31]:
data = [{'a': i, 'b': 2 * i} for i in range(3)]
pd.DataFrame(data)

Unnamed: 0,a,b
0,0,0
1,1,2
2,2,4


In [32]:
pd.DataFrame([{'a':1, 'b':2}, {'b':3, 'c':4}])

Unnamed: 0,a,b,c
0,1.0,2,
1,,3,4.0


In [33]:
pd.DataFrame({'population': population, 'area': area})

Unnamed: 0,area,population
California,423967,38332521
Florida,170312,19552860
Illinois,149995,12882135
New York,141297,19651127
Texas,695662,26448193


In [34]:
pd.DataFrame(np.random.rand(3,2), columns=['foo', 'bar'], index=['a', 'b', 'c'])

Unnamed: 0,foo,bar
a,0.112804,0.262554
b,0.756669,0.320882
c,0.150779,0.142333


In [35]:
A = np.zeros(3, dtype=[('A', 'i8'), ('B', 'f8')])
A

array([(0, 0.), (0, 0.), (0, 0.)], dtype=[('A', '<i8'), ('B', '<f8')])

In [36]:
pd.DataFrame(A)

Unnamed: 0,A,B
0,0,0.0
1,0,0.0
2,0,0.0
