### Модификация таблиц с Pandas

In [119]:
import pandas as pd


Будем работать с датасетом по оттоку клиентов из банка https://www.kaggle.com/datasets/shubh0799/churn-modelling,<br> но датасет из себя будет представлять две таблицы:

1. Личные данные клиента

- CustomerId - Уникальный идентификатор клиента
- Surname - Фамилия клиента
- Geography - Из какой страны клиент
- Gender - Пол клиента
- Age - Возраст клиента
- EstimatedSalary - Предположительная зарплата клиента

2. Данные по поведению клиента в банке

- CustomerId - Уникальный идентификатор клиента
- CustomerId - Уникальный идентификатор клиента
- Tenure - Сколько лет человек является клиентом банка
- Balance - Баланс счета
- NumOfProducts - Количество открытых продуктов
- HasCrCard - Есть ли у клиента кредитная карта
- IsActiveMember - Является ли клиент активные участником
- Exited - Уйдет ли человек в отток

In [120]:
users = pd.read_csv('../Seminar_2/Churn_Modelling.csv', sep=",")
users.head()


Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [121]:
users.shape

(10000, 14)

### Создание новых признаков

In [122]:
users['new_feature'] = 0
users.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,new_feature
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1,0
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1,0
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0,0


In [123]:
users['Age (days)'] = users['Age'] * 365
users.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,new_feature,Age (days)
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1,0,15330
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0,0,14965
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1,0,15330
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0,0,14235
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0,0,15695


In [124]:
for i, row in users.iloc[:2].iterrows():
    print(row)
    print('__' * 30)

RowNumber                  1
CustomerId          15634602
Surname             Hargrave
CreditScore              619
Geography             France
Gender                Female
Age                       42
Tenure                     2
Balance                  0.0
NumOfProducts              1
HasCrCard                  1
IsActiveMember             1
EstimatedSalary    101348.88
Exited                     1
new_feature                0
Age (days)             15330
Name: 0, dtype: object
____________________________________________________________
RowNumber                  2
CustomerId          15647311
Surname                 Hill
CreditScore              608
Geography              Spain
Gender                Female
Age                       41
Tenure                     1
Balance             83807.86
NumOfProducts              1
HasCrCard                  0
IsActiveMember             1
EstimatedSalary    112542.58
Exited                     0
new_feature                0
Age (days)       

In [125]:
age_days = []

for i, row in users.iterrows():
    age_days.append(row['Age'] * 365)

age_days[:10]

[15330, 14965, 15330, 14235, 15695, 16060, 18250, 10585, 16060, 9855]

In [126]:
users['Age (days) 2'] = age_days
users.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,new_feature,Age (days),Age (days) 2
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1,0,15330,15330
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0,0,14965,14965
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1,0,15330,15330
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0,0,14235,14235
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0,0,15695,15695


In [127]:
def age_to_days(x):
    return x * 365

users['Age (days) 3'] = users['Age'].apply(age_to_days)
users.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,new_feature,Age (days),Age (days) 2,Age (days) 3
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1,0,15330,15330,15330
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0,0,14965,14965,14965
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1,0,15330,15330,15330
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0,0,14235,14235,14235
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0,0,15695,15695,15695


In [128]:
import time
from tqdm import tqdm
tqdm.pandas()


def age_to_days(x):
    time.sleep(0.001)
    return x * 365

users['Age'].progress_apply(age_to_days)

ModuleNotFoundError: No module named 'tqdm'

### Удаление признаков

In [None]:
users.drop(columns='new_feature')
users.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,new_feature,Age (days),Age (days) 2,Age (days) 3
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1,0,15330,15330,15330
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0,0,14965,14965,14965
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1,0,15330,15330,15330
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0,0,14235,14235,14235
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0,0,15695,15695,15695


In [None]:
users = users.drop(columns='new_feature')
users.head() 

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Age (days),Age (days) 2,Age (days) 3
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1,15330,15330,15330
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0,14965,14965,14965
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1,15330,15330,15330
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0,14235,14235,14235
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0,15695,15695,15695


In [None]:
users['new_feature'] = 0

In [None]:
users.drop(columns='new_feature', inplace=True)
users.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Age (days),Age (days) 2,Age (days) 3
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1,15330,15330,15330
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0,14965,14965,14965
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1,15330,15330,15330
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0,14235,14235,14235
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0,15695,15695,15695


In [None]:
users.drop(columns=['Age (days)', 'Age (days) 2', 'Age (days) 3'], inplace=True)
users.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


### Изменение существующих признаков

.loc

In [None]:
users['target'] = 0
users.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,new_feature,Age (days),Age (days) 2,Age (days) 3,target
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1,0,15330,15330,15330,0
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0,0,14965,14965,14965,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1,0,15330,15330,15330,0
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0,0,14235,14235,14235,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0,0,15695,15695,15695,0


In [None]:
users.loc[users['Geography'] == 'France']

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,new_feature,Age (days),Age (days) 2,Age (days) 3,target
0,1,15634602,Hargrave,619,France,Female,42,2,0.00,1,1,1,101348.88,1,0,15330,15330,15330,0
2,3,15619304,Onio,502,France,Female,42,8,159660.80,3,1,0,113931.57,1,0,15330,15330,15330,0
3,4,15701354,Boni,699,France,Female,39,1,0.00,2,0,0,93826.63,0,0,14235,14235,14235,0
6,7,15592531,Bartlett,822,France,Male,50,7,0.00,2,1,1,10062.80,0,0,18250,18250,18250,0
8,9,15792365,He,501,France,Male,44,4,142051.07,2,0,1,74940.50,0,0,16060,16060,16060,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9994,9995,15719294,Wood,800,France,Female,29,2,0.00,2,0,0,167773.55,0,0,10585,10585,10585,0
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.00,2,1,0,96270.64,0,0,14235,14235,14235,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,1,1,101699.77,0,0,12775,12775,12775,0
9997,9998,15584532,Liu,709,France,Female,36,7,0.00,1,0,1,42085.58,1,0,13140,13140,13140,0


In [None]:
users.loc[users['Geography'] == 'France', 'target']

0       0
2       0
3       0
6       0
8       0
       ..
9994    0
9995    0
9996    0
9997    0
9999    0
Name: target, Length: 5014, dtype: int64

In [None]:
users[users['Geography'] == 'France']['target'] = 1
users.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  users[users['Geography'] == 'France']['target'] = 1


Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,new_feature,Age (days),Age (days) 2,Age (days) 3,target
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1,0,15330,15330,15330,0
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0,0,14965,14965,14965,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1,0,15330,15330,15330,0
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0,0,14235,14235,14235,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0,0,15695,15695,15695,0


In [None]:
users.loc[users['Geography'] == 'France','target'] = 1
users.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,new_feature,Age (days),Age (days) 2,Age (days) 3,target
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1,0,15330,15330,15330,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0,0,14965,14965,14965,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1,0,15330,15330,15330,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0,0,14235,14235,14235,1
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0,0,15695,15695,15695,0


.replace

In [None]:
users['Gender'].replace({'Female': 'F', 'Male': 'M'}, inplace=True)
users.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,new_feature,Age (days),Age (days) 2,Age (days) 3,target
0,1,15634602,Hargrave,619,France,F,42,2,0.0,1,1,1,101348.88,1,0,15330,15330,15330,1
1,2,15647311,Hill,608,Spain,F,41,1,83807.86,1,0,1,112542.58,0,0,14965,14965,14965,0
2,3,15619304,Onio,502,France,F,42,8,159660.8,3,1,0,113931.57,1,0,15330,15330,15330,1
3,4,15701354,Boni,699,France,F,39,1,0.0,2,0,0,93826.63,0,0,14235,14235,14235,1
4,5,15737888,Mitchell,850,Spain,F,43,2,125510.82,1,1,1,79084.1,0,0,15695,15695,15695,0


### Методы агрегации

Агрегация, или агрегирование — процесс объединения элементов в одно значение<br>
Первый способ - вызвать метод agg() у pd.Series и передать в него список желаемых агрегаций

In [None]:
users['Age'].agg(['min', 'max'])

min    18
max    92
Name: Age, dtype: int64

In [None]:
users.agg({
    'Age': ['min', 'max'],
    'EstimatedSalary': 'mean'
})

Unnamed: 0,Age,EstimatedSalary
min,18.0,
max,92.0,
mean,,100090.239881


In [None]:
users.agg(
    min_age=('Age', 'min'),
    max_age=('Age', 'max'),
    mean_salary=('EstimatedSalary', 'mean')
)

Unnamed: 0,Age,EstimatedSalary
min_age,18.0,
max_age,92.0,
mean_salary,,100090.239881


### Методы объединения

[file](https://gbcdn.mrgcdn.ru/uploads/asset/4239332/attachment/7d8be3cf1f6192275f44c62047e51fc7.html)

```bank = pd.read_csv('bank.csv', sep=';')```

```bank.shape```

```merged = users.merge(bank, left_on='CustomerId', right_on='CustomerId') ```

```users_id = users.set_index('CustomerId')```

```bank_id = bank.set_index('CustomerId')```

```bank_id.join(users_id).head()```

```bank_id.join(users_id).reset_index().head()```

### Атрибут how

<img src ='./how.jpg'>

In [None]:
toy_df1 = pd.DataFrame({
    'col_1': [1, 2, 3],
    'col_2': [9, 9, 9]
})

toy_df2 = pd.DataFrame({
    'col_1': [3, 4],
    'col_3': [0, 0]
})

display(toy_df1, toy_df2)

Unnamed: 0,col_1,col_2
0,1,9
1,2,9
2,3,9


Unnamed: 0,col_1,col_3
0,3,0
1,4,0


In [None]:
toy_df1.merge(toy_df2, how='left')

Unnamed: 0,col_1,col_2,col_3
0,1,9,
1,2,9,
2,3,9,0.0


In [None]:
toy_df1.merge(toy_df2, how='right')

Unnamed: 0,col_1,col_2,col_3
0,3,9.0,0
1,4,,0


In [None]:
toy_df1.merge(toy_df2, how='inner')

Unnamed: 0,col_1,col_2,col_3
0,3,9,0


In [None]:
toy_df1.merge(toy_df2, how='outer')

Unnamed: 0,col_1,col_2,col_3
0,1,9.0,
1,2,9.0,
2,3,9.0,0.0
3,4,,0.0


left

```merged_left = bank.merge(users, on='CustomerId', how='left')```

```merged_right.isna().sum()```

```merged_right[merged_right['CreditScore'].isna()]```

```bank[bank['CustomerId'] == 15611325]```

inner

```merged_inner = bank.merge(users, on='CustomerId', how='inner')```

```merged_inner.isna().sum()```

outer

```merged_outer = bank.merge(users, on='CustomerId', how='outer')```

```merged_outer.isna().sum()```

### Методы группировок

<img src = './groupby.webp'>

In [None]:
toy_df = pd.DataFrame({
    'client_id': [1, 2, 2, 3, 1, 1],
    'item': ['chocolate', 'cheese', 'ham', 'candy', 'chair', 'book'],
    'price': [68, 280, 302, 39, 2099, 1089]
})

toy_df

Unnamed: 0,client_id,item,price
0,1,chocolate,68
1,2,cheese,280
2,2,ham,302
3,3,candy,39
4,1,chair,2099
5,1,book,1089


In [None]:
grouped = toy_df.groupby('client_id')
grouped

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x0000018CF77A1310>

In [None]:
grouped.groups

{1: [0, 4, 5], 2: [1, 2], 3: [3]}

In [None]:
grouped.sum()

Unnamed: 0_level_0,item,price
client_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,chocolatechairbook,3256
2,cheeseham,582
3,candy,39


In [None]:
grouped.agg({'price': ['sum', 'min', 'max']})

Unnamed: 0_level_0,price,price,price
Unnamed: 0_level_1,sum,min,max
client_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
1,3256,68,2099
2,582,280,302
3,39,39,39


In [None]:
users.groupby('Geography').agg({'Age': ['mean'], 'EstimatedSalary': ['min']})

Unnamed: 0_level_0,Age,EstimatedSalary
Unnamed: 0_level_1,mean,min
Geography,Unnamed: 1_level_2,Unnamed: 2_level_2
France,38.511767,90.07
Germany,39.771622,11.58
Spain,38.890997,417.41


pivot_table

In [None]:
toy_df

Unnamed: 0,client_id,item,price
0,1,chocolate,68
1,2,cheese,280
2,2,ham,302
3,3,candy,39
4,1,chair,2099
5,1,book,1089


In [None]:
toy_df.pivot_table(index='client_id',
                   values='price',
                   aggfunc='sum')

Unnamed: 0_level_0,price
client_id,Unnamed: 1_level_1
1,3256
2,582
3,39


In [None]:
users.pivot_table(index='Geography',
                  aggfunc={'Age': ['mean'], 'EstimatedSalary': 'min'})

Unnamed: 0_level_0,Age,EstimatedSalary
Unnamed: 0_level_1,mean,min
Geography,Unnamed: 1_level_2,Unnamed: 2_level_2
France,38.511767,90.07
Germany,39.771622,11.58
Spain,38.890997,417.41


In [None]:
users.pivot_table(index='Geography',
                  columns='Gender', 
                  values='EstimatedSalary',
                  aggfunc='mean',
                  margins=True,
                  margins_name='Total')

Gender,F,M,Total
Geography,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
France,99564.252755,100174.252495,99899.180814
Germany,102446.424124,99905.033959,101113.435102
Spain,100734.107475,98425.68768,99440.572281
Total,100601.541382,99664.576931,100090.239881


crosstab

In [None]:
pd.crosstab(index=users['Geography'],
            columns=users['Gender'])

Gender,F,M
Geography,Unnamed: 1_level_1,Unnamed: 2_level_1
France,2261,2753
Germany,1193,1316
Spain,1089,1388


In [None]:
pd.crosstab(index=users['Geography'],
            columns=users['Gender'],
            values=users['EstimatedSalary'],
            aggfunc='mean')

Gender,F,M
Geography,Unnamed: 1_level_1,Unnamed: 2_level_1
France,99564.252755,100174.252495
Germany,102446.424124,99905.033959
Spain,100734.107475,98425.68768


In [None]:
pd.crosstab(index=users['Geography'],
            columns=users['Gender'],
            normalize='all')

Gender,F,M
Geography,Unnamed: 1_level_1,Unnamed: 2_level_1
France,0.2261,0.2753
Germany,0.1193,0.1316
Spain,0.1089,0.1388


In [None]:
pd.crosstab(index=users['Geography'],
            columns=users['Gender'],
            normalize='index')

Gender,F,M
Geography,Unnamed: 1_level_1,Unnamed: 2_level_1
France,0.450937,0.549063
Germany,0.475488,0.524512
Spain,0.439645,0.560355


In [None]:
pd.crosstab(index=users['Geography'],
            columns=users['Gender'],
            normalize='columns')

Gender,F,M
Geography,Unnamed: 1_level_1,Unnamed: 2_level_1
France,0.497689,0.50449
Germany,0.262602,0.241158
Spain,0.239709,0.254352


### Встроенные визуализации

In [129]:
users['Age'].hist();

ImportError: matplotlib is required for plotting when the default backend "matplotlib" is selected.

In [None]:
data = users.groupby('Gender').count()['Age']
data.name = 'Gender'
data

Gender
F    4543
M    5457
Name: Gender, dtype: int64

In [None]:
data.plot.pie(y='Gender');

ImportError: matplotlib is required for plotting when the default backend "matplotlib" is selected.

In [None]:
users.iloc[:100].plot.scatter(x='Age', y='EstimatedSalary');

ImportError: matplotlib is required for plotting when the default backend "matplotlib" is selected.

In [None]:
data = users.groupby('Tenure').count()['Balance']
data.name = 'num_clients'
data

Tenure
0      413
1     1035
2     1048
3     1009
4      989
5     1012
6      967
7     1028
8     1025
9      984
10     490
Name: num_clients, dtype: int64

In [None]:
data.plot.bar(width=0.8);

ImportError: matplotlib is required for plotting when the default backend "matplotlib" is selected.