In [77]:
import pandas as pd
import numpy as np

stock = pd.DataFrame({
    'item_no': pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype='Int64'),
    'cost_class': pd.Series(['1st', '2nd', '3rd', '4th', '4th', '3rd', '2nd', np.nan, '1st', '3rd'], dtype='string'),
    'cost': pd.Series([10.99, np.nan, 2.99, np.nan, 2.99, 2.45, 5.99, 5.99, 3.00, None], dtype='float64'),
    'stock_code': pd.Series(['a', 'a', 'c', 'b', 'a', 'b', np.nan, np.nan, 'a', 'c'], dtype='string'),
    'priority_code': pd.Series([np.nan, None, 'a', 'b', None, 'a', 'e', None, 'a', 'd'], dtype='string'),
    'tax_rate': pd.Series([0, 0, 20, 20, 20, 0, 20, 20, 5, 20])
}).set_index('item_no')

In [78]:
stock.loc[:, 'year'] = 2020
stock

Unnamed: 0_level_0,cost_class,cost,stock_code,priority_code,tax_rate,year
item_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,1st,10.99,a,,0,2020
2,2nd,,a,,0,2020
3,3rd,2.99,c,a,20,2020
4,4th,,b,b,20,2020
5,4th,2.99,a,,20,2020
6,3rd,2.45,b,a,0,2020
7,2nd,5.99,,e,20,2020
8,,5.99,,,20,2020
9,1st,3.0,a,a,5,2020
10,3rd,,c,d,20,2020


In [79]:
stock.assign(new_year=2021, checked=True)

Unnamed: 0_level_0,cost_class,cost,stock_code,priority_code,tax_rate,year,new_year,checked
item_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,1st,10.99,a,,0,2020,2021,True
2,2nd,,a,,0,2020,2021,True
3,3rd,2.99,c,a,20,2020,2021,True
4,4th,,b,b,20,2020,2021,True
5,4th,2.99,a,,20,2020,2021,True
6,3rd,2.45,b,a,0,2020,2021,True
7,2nd,5.99,,e,20,2020,2021,True
8,,5.99,,,20,2020,2021,True
9,1st,3.0,a,a,5,2020,2021,True
10,3rd,,c,d,20,2020,2021,True


In [80]:
#lookup table
adjust_lookup = {
    '1st': 12.5,
    '2nd': 5,
    '3rd': 0,
    '4th': -5,
    pd.NA: np.nan
}

adjust_lookup

{'1st': 12.5, '2nd': 5, '3rd': 0, '4th': -5, <NA>: nan}

In [81]:
adjust_lookup.get('1st', np.nan)

12.5

In [82]:
[adjust_lookup.get(cc)
 for cc in stock.cost_class
]

[12.5, 5, 0, -5, -5, 0, 5, nan, 12.5, 0]

In [83]:
stock.loc[:, 'cost_adjustment'] = [adjust_lookup.get(cc)for cc in stock.cost_class]

stock

Unnamed: 0_level_0,cost_class,cost,stock_code,priority_code,tax_rate,year,cost_adjustment
item_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,1st,10.99,a,,0,2020,12.5
2,2nd,,a,,0,2020,5.0
3,3rd,2.99,c,a,20,2020,0.0
4,4th,,b,b,20,2020,-5.0
5,4th,2.99,a,,20,2020,-5.0
6,3rd,2.45,b,a,0,2020,0.0
7,2nd,5.99,,e,20,2020,5.0
8,,5.99,,,20,2020,
9,1st,3.0,a,a,5,2020,12.5
10,3rd,,c,d,20,2020,0.0


In [84]:
stock.loc[:, 'stock_inc_tax'] = stock.cost + stock.tax_rate * stock.cost / 100
stock

Unnamed: 0_level_0,cost_class,cost,stock_code,priority_code,tax_rate,year,cost_adjustment,stock_inc_tax
item_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,1st,10.99,a,,0,2020,12.5,10.99
2,2nd,,a,,0,2020,5.0,
3,3rd,2.99,c,a,20,2020,0.0,3.588
4,4th,,b,b,20,2020,-5.0,
5,4th,2.99,a,,20,2020,-5.0,3.588
6,3rd,2.45,b,a,0,2020,0.0,2.45
7,2nd,5.99,,e,20,2020,5.0,7.188
8,,5.99,,,20,2020,,7.188
9,1st,3.0,a,a,5,2020,12.5,3.15
10,3rd,,c,d,20,2020,0.0,


In [85]:
#task: round new column to 2 decimal figures
stock.loc[:, 'stock_inc_tax'] = np.round(stock.loc[:, 'stock_inc_tax'], decimals=2)

In [86]:
stock

Unnamed: 0_level_0,cost_class,cost,stock_code,priority_code,tax_rate,year,cost_adjustment,stock_inc_tax
item_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,1st,10.99,a,,0,2020,12.5,10.99
2,2nd,,a,,0,2020,5.0,
3,3rd,2.99,c,a,20,2020,0.0,3.59
4,4th,,b,b,20,2020,-5.0,
5,4th,2.99,a,,20,2020,-5.0,3.59
6,3rd,2.45,b,a,0,2020,0.0,2.45
7,2nd,5.99,,e,20,2020,5.0,7.19
8,,5.99,,,20,2020,,7.19
9,1st,3.0,a,a,5,2020,12.5,3.15
10,3rd,,c,d,20,2020,0.0,


In [87]:
#drop new column
stock = stock.drop('stock_inc_tax', axis = 'columns')

In [88]:
stock

Unnamed: 0_level_0,cost_class,cost,stock_code,priority_code,tax_rate,year,cost_adjustment
item_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,1st,10.99,a,,0,2020,12.5
2,2nd,,a,,0,2020,5.0
3,3rd,2.99,c,a,20,2020,0.0
4,4th,,b,b,20,2020,-5.0
5,4th,2.99,a,,20,2020,-5.0
6,3rd,2.45,b,a,0,2020,0.0
7,2nd,5.99,,e,20,2020,5.0
8,,5.99,,,20,2020,
9,1st,3.0,a,a,5,2020,12.5
10,3rd,,c,d,20,2020,0.0


In [89]:
#reove na rows from cost class
stock.drop(stock.index[stock.cost_class.isna()], axis = 'rows')

Unnamed: 0_level_0,cost_class,cost,stock_code,priority_code,tax_rate,year,cost_adjustment
item_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,1st,10.99,a,,0,2020,12.5
2,2nd,,a,,0,2020,5.0
3,3rd,2.99,c,a,20,2020,0.0
4,4th,,b,b,20,2020,-5.0
5,4th,2.99,a,,20,2020,-5.0
6,3rd,2.45,b,a,0,2020,0.0
7,2nd,5.99,,e,20,2020,5.0
9,1st,3.0,a,a,5,2020,12.5
10,3rd,,c,d,20,2020,0.0


In [90]:
#more efficient way 
stock = stock.dropna(axis = 'rows', subset = ['cost_class'])
stock

Unnamed: 0_level_0,cost_class,cost,stock_code,priority_code,tax_rate,year,cost_adjustment
item_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,1st,10.99,a,,0,2020,12.5
2,2nd,,a,,0,2020,5.0
3,3rd,2.99,c,a,20,2020,0.0
4,4th,,b,b,20,2020,-5.0
5,4th,2.99,a,,20,2020,-5.0
6,3rd,2.45,b,a,0,2020,0.0
7,2nd,5.99,,e,20,2020,5.0
9,1st,3.0,a,a,5,2020,12.5
10,3rd,,c,d,20,2020,0.0


In [91]:
# fill missing values in cost with median of all data 
stock = stock.fillna({'cost': np.round(stock.cost.median(), decimals=2)})
stock

Unnamed: 0_level_0,cost_class,cost,stock_code,priority_code,tax_rate,year,cost_adjustment
item_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,1st,10.99,a,,0,2020,12.5
2,2nd,3.0,a,,0,2020,5.0
3,3rd,2.99,c,a,20,2020,0.0
4,4th,3.0,b,b,20,2020,-5.0
5,4th,2.99,a,,20,2020,-5.0
6,3rd,2.45,b,a,0,2020,0.0
7,2nd,5.99,,e,20,2020,5.0
9,1st,3.0,a,a,5,2020,12.5
10,3rd,3.0,c,d,20,2020,0.0


## chained indexing 

In [92]:
#reduce item in 1st cost class and stock code a by 10%
mask = (stock.cost_class == '1st') & (stock.stock_code == 'a')

#rows where mask applies
stock.loc[mask]

Unnamed: 0_level_0,cost_class,cost,stock_code,priority_code,tax_rate,year,cost_adjustment
item_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,1st,10.99,a,,0,2020,12.5
9,1st,3.0,a,a,5,2020,12.5


In [93]:
#overwrite cost in these rows
stock[mask]['cost'] = (stock[mask]['cost']*0.9).round(decimals = 2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stock[mask]['cost'] = (stock[mask]['cost']*0.9).round(decimals = 2)


In [94]:
#this works, above doesn't for an unknown reason
stock['cost'][mask] = (stock[mask]['cost']*0.9).round(decimals = 2)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stock['cost'][mask] = (stock[mask]['cost']*0.9).round(decimals = 2)


In [95]:
stock[mask]

Unnamed: 0_level_0,cost_class,cost,stock_code,priority_code,tax_rate,year,cost_adjustment
item_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,1st,9.89,a,,0,2020,12.5
9,1st,2.7,a,a,5,2020,12.5


In [96]:
stock

Unnamed: 0_level_0,cost_class,cost,stock_code,priority_code,tax_rate,year,cost_adjustment
item_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,1st,9.89,a,,0,2020,12.5
2,2nd,3.0,a,,0,2020,5.0
3,3rd,2.99,c,a,20,2020,0.0
4,4th,3.0,b,b,20,2020,-5.0
5,4th,2.99,a,,20,2020,-5.0
6,3rd,2.45,b,a,0,2020,0.0
7,2nd,5.99,,e,20,2020,5.0
9,1st,2.7,a,a,5,2020,12.5
10,3rd,3.0,c,d,20,2020,0.0


In [97]:
#back to original
original_stock_costs = pd.Series([10.99, np.nan, 2.99, np.nan, 2.99, 2.45, 5.99, 5.99, 3.00, None],
                                 index = range(1, 11))
stock.loc[:, 'cost'] = original_stock_costs

stock

Unnamed: 0_level_0,cost_class,cost,stock_code,priority_code,tax_rate,year,cost_adjustment
item_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,1st,10.99,a,,0,2020,12.5
2,2nd,,a,,0,2020,5.0
3,3rd,2.99,c,a,20,2020,0.0
4,4th,,b,b,20,2020,-5.0
5,4th,2.99,a,,20,2020,-5.0
6,3rd,2.45,b,a,0,2020,0.0
7,2nd,5.99,,e,20,2020,5.0
9,1st,3.0,a,a,5,2020,12.5
10,3rd,,c,d,20,2020,0.0


## method chaining > chained indexing

In [57]:
stock[mask]

Unnamed: 0_level_0,cost_class,cost,stock_code,priority_code,tax_rate,year,cost_adjustment
item_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,1st,10.99,a,,0,2020,12.5
9,1st,3.0,a,a,5,2020,12.5


In [98]:
#reduce item in 1st cost class and stock code a by 10%
mask = (stock.cost_class == '1st') & (stock.stock_code == 'a')
stock.loc[mask, 'cost'] = (stock[mask]['cost']*0.9).round(decimals = 2)
stock

Unnamed: 0_level_0,cost_class,cost,stock_code,priority_code,tax_rate,year,cost_adjustment
item_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,1st,9.89,a,,0,2020,12.5
2,2nd,,a,,0,2020,5.0
3,3rd,2.99,c,a,20,2020,0.0
4,4th,,b,b,20,2020,-5.0
5,4th,2.99,a,,20,2020,-5.0
6,3rd,2.45,b,a,0,2020,0.0
7,2nd,5.99,,e,20,2020,5.0
9,1st,2.7,a,a,5,2020,12.5
10,3rd,,c,d,20,2020,0.0


In [99]:
#task: add 5 to all itmes with stock code 'a'
mask = stock.stock_code == 'a'

stock.loc[mask, 'cost'] = (stock[mask]['cost'] + 5)

In [100]:
stock

Unnamed: 0_level_0,cost_class,cost,stock_code,priority_code,tax_rate,year,cost_adjustment
item_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,1st,14.89,a,,0,2020,12.5
2,2nd,,a,,0,2020,5.0
3,3rd,2.99,c,a,20,2020,0.0
4,4th,,b,b,20,2020,-5.0
5,4th,7.99,a,,20,2020,-5.0
6,3rd,2.45,b,a,0,2020,0.0
7,2nd,5.99,,e,20,2020,5.0
9,1st,7.7,a,a,5,2020,12.5
10,3rd,,c,d,20,2020,0.0


In [102]:
#increase the cost of low cost (<= 3) items by 2.00
stock_copy = stock.copy()
stock

Unnamed: 0_level_0,cost_class,cost,stock_code,priority_code,tax_rate,year,cost_adjustment
item_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,1st,14.89,a,,0,2020,12.5
2,2nd,,a,,0,2020,5.0
3,3rd,2.99,c,a,20,2020,0.0
4,4th,,b,b,20,2020,-5.0
5,4th,7.99,a,,20,2020,-5.0
6,3rd,2.45,b,a,0,2020,0.0
7,2nd,5.99,,e,20,2020,5.0
9,1st,7.7,a,a,5,2020,12.5
10,3rd,,c,d,20,2020,0.0


In [103]:
low_cost_mask = stock.cost < 3
low_cost_mask

item_no
1     False
2     False
3      True
4     False
5     False
6      True
7     False
9     False
10    False
Name: cost, dtype: bool

In [105]:
stock_copy.loc[low_cost_mask, 'cost'] = stock_copy.cost[low_cost_mask] + 2.00

In [106]:
stock_copy

Unnamed: 0_level_0,cost_class,cost,stock_code,priority_code,tax_rate,year,cost_adjustment
item_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,1st,14.89,a,,0,2020,12.5
2,2nd,,a,,0,2020,5.0
3,3rd,4.0,c,a,20,2020,0.0
4,4th,,b,b,20,2020,-5.0
5,4th,7.99,a,,20,2020,-5.0
6,3rd,4.0,b,a,0,2020,0.0
7,2nd,5.99,,e,20,2020,5.0
9,1st,7.7,a,a,5,2020,12.5
10,3rd,,c,d,20,2020,0.0


In [116]:
#task: copy cost and cost class to cost_copy df & add cost_zscore column using function

def z_score(series):
    mean = series.mean()
    std = series.std()
    return (series - mean) / std

cost_copy = stock.loc[:, ['cost_class', 'cost']].copy()

cost_copy['cost_zscore'] = z_score(cost_copy.cost)

cost_copy

Unnamed: 0_level_0,cost_class,cost,cost_zscore
item_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1st,14.89,1.750588
2,2nd,,
3,3rd,2.99,-0.890274
4,4th,,
5,4th,7.99,0.219332
6,3rd,2.45,-1.010111
7,2nd,5.99,-0.22451
9,1st,7.7,0.154975
10,3rd,,
