# Основные объекты в Pandas

In [43]:
import pandas as pd
# pip install pandas

In [44]:
pd.__version__

'1.2.3'

## Series

In [45]:
s = pd.Series(["Брест","Харьков","Сочи","Тула"])
s

0      Брест
1    Харьков
2       Сочи
3       Тула
dtype: object

In [46]:
s = pd.Series(["Брест","Харьков","Сочи","Тула"], 
              index=["Крепость","Госпром","Море","Пряник"])
s

Крепость      Брест
Госпром     Харьков
Море           Сочи
Пряник         Тула
dtype: object

In [47]:
# .loc - Доступ по меткам
s.loc["Крепость"]

'Брест'

In [48]:
s.Крепость

'Брест'

In [49]:
s.loc[["Крепость", "Госпром"]]

Крепость      Брест
Госпром     Харьков
dtype: object

In [50]:
# .iloc - доступ по порядковому номеру
s.iloc[0]

'Брест'

In [51]:
s.iloc[[0,2]]

Крепость    Брест
Море         Сочи
dtype: object

In [52]:
s.iloc[0:2]

Крепость      Брест
Госпром     Харьков
dtype: object

## DataFrame

In [53]:
#(A)
df = pd.DataFrame({
    'col1': [1,2],
    'col2': [3,4],
})
df

Unnamed: 0,col1,col2
0,1,3
1,2,4


In [54]:
# (B)
df = pd.DataFrame([[1,2,3],[10,20,30]],
                  columns=['aaa','bbb','ccc'],
                  index=['line1','line2']
                  )
df

Unnamed: 0,aaa,bbb,ccc
line1,1,2,3
line2,10,20,30


# HW:
- читать документацию по Pandas
- создать свои примеры

In [55]:
import json
json_buf=json.dumps({'columns':['col_A', 'col_B', 'col_C'],'index': [0,1],'data':[['a1','b1','c1'],['a2','b2','c2']]})
df1=pd.read_json(json_buf, orient='split')
df1

Unnamed: 0,col_A,col_B,col_C
0,a1,b1,c1
1,a2,b2,c2


In [56]:
json_buf2= json.dumps([{'col_A': 'a1', 'col_B': 'b1', 'col_C':'c1'}, {'col_A': 'a2', 'col_B': 'b2', 'col_C': 'c2'}])
df2 = pd.read_json(json_buf2, orient='record')
df2

Unnamed: 0,col_A,col_B,col_C
0,a1,b1,c1
1,a2,b2,c2


In [57]:
from io import StringIO

In [58]:
data = 'price,count,percent\n1,10,\n2,20,51\n3,30,'
df = pd.read_csv(StringIO(data))
df

Unnamed: 0,price,count,percent
0,1,10,
1,2,20,51.0
2,3,30,


In [59]:
df.loc[3] = {'price':4, 'count':None, 'percent':26.3}
df

Unnamed: 0,price,count,percent
0,1,10.0,
1,2,20.0,51.0
2,3,30.0,
3,4,,26.3


In [60]:
df.isnull()

Unnamed: 0,price,count,percent
0,False,False,True
1,False,False,False
2,False,False,True
3,False,True,False


In [61]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4 entries, 0 to 3
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   price    4 non-null      int64  
 1   count    3 non-null      object 
 2   percent  2 non-null      float64
dtypes: float64(1), int64(1), object(1)
memory usage: 128.0+ bytes


In [62]:
df.isnull().sum()

price      0
count      1
percent    2
dtype: int64

In [63]:
df.fillna(0)

Unnamed: 0,price,count,percent
0,1,10,0.0
1,2,20,51.0
2,3,30,0.0
3,4,0,26.3


In [64]:
df.fillna(df.mean())

Unnamed: 0,price,count,percent
0,1,10.0,38.65
1,2,20.0,51.0
2,3,30.0,38.65
3,4,20.0,26.3


In [65]:
df.dropna()

Unnamed: 0,price,count,percent
1,2,20,51.0


In [66]:
df.dropna(axis=1)

Unnamed: 0,price
0,1
1,2
2,3
3,4


In [67]:
df.dropna(axis=1,thresh=3)

Unnamed: 0,price,count
0,1,10.0
1,2,20.0
2,3,30.0
3,4,


In [68]:
s = pd.Series([1, 2, 3, 4, 5], ['A', 'B', 'C', 'D', 'E'])
d = {'color':['red', 'green', 'blue'], 'speed': [56, 24, 65], 'volume': [80, 65, 50]}
df = pd.DataFrame(d)
df

Unnamed: 0,color,speed,volume
0,red,56,80
1,green,24,65
2,blue,65,50


In [69]:
s['F'] = 6
s

A    1
B    2
C    3
D    4
E    5
F    6
dtype: int64

In [70]:
# добавить столбец
df['type'] = ['circle','square','triangle']
df

Unnamed: 0,color,speed,volume,type
0,red,56,80,circle
1,green,24,65,square
2,blue,65,50,triangle


In [71]:
df['value'] = 7
df

Unnamed: 0,color,speed,volume,type,value
0,red,56,80,circle,7
1,green,24,65,square,7
2,blue,65,50,triangle,7


In [72]:
new_row = pd.Series(['yellow', 34, 10, 'rectangle', 7], ['color', 'speed', 'volume', 'type', 'value'])
df.append(new_row, ignore_index=True)

Unnamed: 0,color,speed,volume,type,value
0,red,56,80,circle,7
1,green,24,65,square,7
2,blue,65,50,triangle,7
3,yellow,34,10,rectangle,7


In [73]:
s

A    1
B    2
C    3
D    4
E    5
F    6
dtype: int64

In [74]:
s_new = s.drop(['A','B'])
s_new

C    3
D    4
E    5
F    6
dtype: int64

In [75]:
df

Unnamed: 0,color,speed,volume,type,value
0,red,56,80,circle,7
1,green,24,65,square,7
2,blue,65,50,triangle,7


In [76]:
df_new = df.drop([0])
df_new

Unnamed: 0,color,speed,volume,type,value
1,green,24,65,square,7
2,blue,65,50,triangle,7


In [77]:
df.drop(['color', 'value'], axis=1)

Unnamed: 0,speed,volume,type
0,56,80,circle
1,24,65,square
2,65,50,triangle


In [78]:
dfr1 = pd.DataFrame({'a_type':['a1', 'a2', 'a3'], 'b_type':['b1', 'b2', 'b3'], 'c_type':['c1', 'c2', 'c3']}, index=[0, 1, 2])
print(dfr1)
dfr2 = pd.DataFrame({'a_type':['a4', 'a5', 'a6'], 'b_type':['b4', 'b5', 'b6'], 'c_type':['c4', 'c5', 'c6']}, index=[3, 4, 5])
print(dfr2)

  a_type b_type c_type
0     a1     b1     c1
1     a2     b2     c2
2     a3     b3     c3
  a_type b_type c_type
3     a4     b4     c4
4     a5     b5     c5
5     a6     b6     c6


In [79]:
df1 = pd.concat([dfr1,dfr2])
df1

Unnamed: 0,a_type,b_type,c_type
0,a1,b1,c1
1,a2,b2,c2
2,a3,b3,c3
3,a4,b4,c4
4,a5,b5,c5
5,a6,b6,c6


In [80]:
dfr3 = pd.DataFrame({'d_type':['d1', 'd2', 'd3'], 'e_type':['e1', 'e2', 'e3']})
dfr3

Unnamed: 0,d_type,e_type
0,d1,e1
1,d2,e2
2,d3,e3


In [81]:
df2 = pd.concat([dfr1,dfr3],axis=1)
df2

Unnamed: 0,a_type,b_type,c_type,d_type,e_type
0,a1,b1,c1,d1,e1
1,a2,b2,c2,d2,e2
2,a3,b3,c3,d3,e3


In [82]:
df3 = pd.concat([dfr1,dfr2], keys=['dfr1','dfr2'])
df3

Unnamed: 0,Unnamed: 1,a_type,b_type,c_type
dfr1,0,a1,b1,c1
dfr1,1,a2,b2,c2
dfr1,2,a3,b3,c3
dfr2,3,a4,b4,c4
dfr2,4,a5,b5,c5
dfr2,5,a6,b6,c6


In [83]:
df3.loc['dfr2']

Unnamed: 0,a_type,b_type,c_type
3,a4,b4,c4
4,a5,b5,c5
5,a6,b6,c6


In [84]:
dfr4 = pd.DataFrame({'d_type':['d2', 'd3', 'd4'], 'e_type':['e2', 'e3', 'e4']}, index=[1, 2, 3])
dfr4

Unnamed: 0,d_type,e_type
1,d2,e2
2,d3,e3
3,d4,e4


In [85]:
df4 = pd.concat([dfr1, dfr4], axis=1, join='outer')
df4

Unnamed: 0,a_type,b_type,c_type,d_type,e_type
0,a1,b1,c1,,
1,a2,b2,c2,d2,e2
2,a3,b3,c3,d3,e3
3,,,,d4,e4


In [86]:
df5 = pd.concat([dfr1, dfr4], axis=1, join='inner')
df5

Unnamed: 0,a_type,b_type,c_type,d_type,e_type
1,a2,b2,c2,d2,e2
2,a3,b3,c3,d3,e3


In [87]:
dfr1 = pd.DataFrame({'k':['k1', 'k2', 'k3'], 'a_type':['a1', 'a2', 'a3'], 'b_type':['b1', 'b2', 'b3']})
dfr2 = pd.DataFrame({'k':['k1', 'k2', 'k3'], 'c_type':['c1', 'c2', 'c3']})
dfr3 = pd.DataFrame({'k':['k0', 'k1', 'k2'], 'c_type':['c1', 'c2', 'c3']})

In [88]:
dfm1 = pd.merge(dfr1,dfr2, on='k')
dfm1

Unnamed: 0,k,a_type,b_type,c_type
0,k1,a1,b1,c1
1,k2,a2,b2,c2
2,k3,a3,b3,c3


In [89]:
dfm2 = pd.merge(dfr1, dfr3, how='left', on='k')
dfm2

Unnamed: 0,k,a_type,b_type,c_type
0,k1,a1,b1,c2
1,k2,a2,b2,c3
2,k3,a3,b3,


In [None]:
dfm3 = pd.merge(dfr1, dfr3, how='right', on='k')
dfm3

In [None]:
dfm4 = pd.merge(dfr1, dfr3, how='outer', on='k')
dfm4

In [None]:
dfm5 = pd.merge(dfr1, dfr3, how='inner', on='k')
dfm5