In [3]:
import pandas as pd
import numpy as np

In [8]:
# Создание Series путем передачи списка значений, позволяя pandas создавать целочисленный индекс по умолчанию:
p = pd.Series([1,3,5, np.nan, 6,8])
p

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [18]:
# Создание DataFrame путем передачи массива NumPy с индексом даты и времени и помеченными столбцами:
dates = pd.date_range('20220804', periods=6)
dates

df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))
df

Unnamed: 0,A,B,C,D
2022-08-04,-0.8498,0.69536,0.094633,-0.662285
2022-08-05,-0.12111,2.591458,0.583557,-1.573333
2022-08-06,-1.584623,-0.275013,-0.509422,-0.487337
2022-08-07,-0.44726,-0.169682,0.064228,0.667549
2022-08-08,0.104632,-0.512073,1.004641,-1.287297
2022-08-09,0.653956,-1.970424,0.328952,-0.493907


In [11]:
# Создание DataFrame путем передачи словаря объектов, которые можно преобразовать в структуру, подобную серии:
df_2 = pd.DataFrame(
    {
       "A": 1.0,
        "B": pd.Timestamp("20130102"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train"]),
        "F": "foo", 
    }
)
df_2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [13]:
# Полученные столбцы DataFrameимеют разные dtypes 
df_2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [22]:
# с головы
df.head(2)

Unnamed: 0,A,B,C,D
2022-08-04,-0.8498,0.69536,0.094633,-0.662285
2022-08-05,-0.12111,2.591458,0.583557,-1.573333


In [24]:
# с хвоста 
df.tail(2)

Unnamed: 0,A,B,C,D
2022-08-08,0.104632,-0.512073,1.004641,-1.287297
2022-08-09,0.653956,-1.970424,0.328952,-0.493907


In [25]:
# отобразить индексы
df.index

DatetimeIndex(['2022-08-04', '2022-08-05', '2022-08-06', '2022-08-07',
               '2022-08-08', '2022-08-09'],
              dtype='datetime64[ns]', freq='D')

In [26]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.374034,0.059937,0.261098,-0.639435
std,0.780653,1.511743,0.514123,0.780376
min,-1.584623,-1.970424,-0.509422,-1.573333
25%,-0.749165,-0.452808,0.071829,-1.131044
50%,-0.284185,-0.222348,0.211793,-0.578096
75%,0.048196,0.479099,0.519906,-0.488979
max,0.653956,2.591458,1.004641,0.667549


In [28]:
# Транспонирование ваших данных:
df.T

Unnamed: 0,2022-08-04,2022-08-05,2022-08-06,2022-08-07,2022-08-08,2022-08-09
A,-0.8498,-0.12111,-1.584623,-0.44726,0.104632,0.653956
B,0.69536,2.591458,-0.275013,-0.169682,-0.512073,-1.970424
C,0.094633,0.583557,-0.509422,0.064228,1.004641,0.328952
D,-0.662285,-1.573333,-0.487337,0.667549,-1.287297,-0.493907


In [30]:
# Сортировка по оси:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2022-08-04,-0.662285,0.094633,0.69536,-0.8498
2022-08-05,-1.573333,0.583557,2.591458,-0.12111
2022-08-06,-0.487337,-0.509422,-0.275013,-1.584623
2022-08-07,0.667549,0.064228,-0.169682,-0.44726
2022-08-08,-1.287297,1.004641,-0.512073,0.104632
2022-08-09,-0.493907,0.328952,-1.970424,0.653956


In [31]:
# Сортировка по значениям
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2022-08-09,0.653956,-1.970424,0.328952,-0.493907
2022-08-08,0.104632,-0.512073,1.004641,-1.287297
2022-08-06,-1.584623,-0.275013,-0.509422,-0.487337
2022-08-07,-0.44726,-0.169682,0.064228,0.667549
2022-08-04,-0.8498,0.69536,0.094633,-0.662285
2022-08-05,-0.12111,2.591458,0.583557,-1.573333


##  Выборка данных

In [33]:
# доступ к данным реализован как
df['A']

2022-08-04   -0.849800
2022-08-05   -0.121110
2022-08-06   -1.584623
2022-08-07   -0.447260
2022-08-08    0.104632
2022-08-09    0.653956
Freq: D, Name: A, dtype: float64

In [45]:
df[0:2]

Unnamed: 0,A,B,C,D
2022-08-04,-0.8498,0.69536,0.094633,-0.662285
2022-08-05,-0.12111,2.591458,0.583557,-1.573333


In [53]:
# по строке
df.loc[dates[1]]

A   -0.121110
B    2.591458
C    0.583557
D   -1.573333
Name: 2022-08-05 00:00:00, dtype: float64

In [61]:
# мультиось 
df.loc[:, ['A', 'B']]

Unnamed: 0,A,B
2022-08-04,-0.8498,0.69536
2022-08-05,-0.12111,2.591458
2022-08-06,-1.584623,-0.275013
2022-08-07,-0.44726,-0.169682
2022-08-08,0.104632,-0.512073
2022-08-09,0.653956,-1.970424


In [65]:
# Уменьшение размеров возвращаемого объекта:
df.loc["20220804", ["A", "B"]]

A   -0.84980
B    0.69536
Name: 2022-08-04 00:00:00, dtype: float64