# Note
This notebook contains stuff that was done at an earlier point in time. Therefore some of the things that were done can also be found in other notebooks. The csv-files read at the beginning do not exist in that exact form anymore.

The main reason you see this notebook uploaded is that you can see the pain of manually categorizing the products in the merged dataframe. 
At some point a automized version gets uploaded.

In [1]:
import pandas as pd

In [2]:
orders = pd.read_csv('orders_clean.csv')
orderlines = pd.read_csv('orderlines_clean.csv')
products = pd.read_csv('products_cleaned.csv')
brands = pd.read_csv('brands_clean.csv')

In [3]:
pd.set_option('display.float_format', lambda x: '%.2f' % x)
pd.set_option('display.max_rows', 1000)

### Cleaning the data from rows, which sku does not appear in the products table

In [4]:
orderlines

Unnamed: 0,id,id_order,product_quantity,sku,unit_price,date,total_price
0,1119109,299539,1,OTT0133,18.99,2017-01-01 00:07:19,18.99
1,1119110,299540,1,LGE0043,399.00,2017-01-01 00:19:45,399.00
2,1119111,299541,1,PAR0071,474.05,2017-01-01 00:20:57,474.05
3,1119112,299542,1,WDT0315,68.39,2017-01-01 00:51:40,68.39
4,1119113,299543,1,JBL0104,23.74,2017-01-01 01:06:38,23.74
...,...,...,...,...,...,...,...
293413,1650199,527398,1,JBL0122,42.99,2018-03-14 13:57:25,42.99
293414,1650200,527399,1,PAC0653,141.58,2018-03-14 13:57:34,141.58
293415,1650201,527400,2,APP0698,9.99,2018-03-14 13:57:41,19.98
293416,1650202,527388,1,BEZ0204,19.99,2018-03-14 13:58:01,19.99


In the next step we want to exclude orders which contain products that do not appear in the products list.
Since orderlines also has a sku column, we can compare the orderlines and products df.
From the code  *orderlines.loc[~orderlines.sku.isin(products.sku)]* we see that 1407 rows contain sku's that do not belong to a product in the product table. So we should remove those from the orders and the orderlines table

Note that right now orderlines hat 293418 rows

In [5]:
orderlines['check_products'] = ~orderlines.sku.isin(products.sku)
orderlines.loc[orderlines.check_products]

Unnamed: 0,id,id_order,product_quantity,sku,unit_price,date,total_price,check_products
109,1119316,299638,1,SYN0127,223.24,2017-01-01 11:56:18,223.24,True
180,1119477,299706,1,EVU0007,28.49,2017-01-01 13:57:16,28.49,True
186,1119494,299712,1,APP0608,279.99,2017-01-01 14:10:47,279.99,True
240,1119584,299766,1,APP0607,279.99,2017-01-01 16:09:02,279.99,True
271,1119637,299799,1,APP0608,279.99,2017-01-01 17:09:18,279.99,True
...,...,...,...,...,...,...,...,...
262950,1601131,491926,1,REP0088,479.65,2018-02-05 13:17:51,479.65,True
262951,1601133,491926,1,REP0088,14.31,2018-02-05 13:20:30,14.31,True
262953,1601136,491926,1,REP0137,54.22,2018-02-05 13:22:01,54.22,True
272613,1617050,510703,1,PIE0081,16.52,2018-02-16 17:15:43,16.52,True


Now we should remove every id_order, that contains a product that is not in the list.
Note that every row with such an id_order should be removed, not only the ones with a non-existing sku.
To do this we can create a new table orderlines_id_remove, which is grouped by the id_order and aggregates the False-counts of check_products. If the count is >0, we remove that id_order.
To make life a little easier, I change the check_products column for this: It says True if the sku is NOT in the product list.
Now we can sum up easier.

In [6]:
orderlines_id_remove = orderlines.copy().groupby('id_order').agg({'check_products':'sum'})
orderlines_id_remove.loc[orderlines_id_remove.check_products > 0]

Unnamed: 0_level_0,check_products
id_order,Unnamed: 1_level_1
258985,1
268659,1
285098,1
299638,1
299706,1
...,...
492329,1
492571,1
493528,1
510703,1


We have found 1384 id_orders that can be removed. We keep only those id_orders, where orderlines_id_remove.check_products == 0.
We also reset the index, so that we don't get a key error in the next step.

In [7]:
orderlines_id_remove = orderlines_id_remove.loc[orderlines_id_remove.check_products == 0].reset_index()
orderlines_id_remove

Unnamed: 0,id_order,check_products
0,241319,0
1,241423,0
2,242832,0
3,243330,0
4,243784,0
...,...,...
203124,527397,0
203125,527398,0
203126,527399,0
203127,527400,0


Now we only keep those rows from orderlines, that have an id_order, that appears in orderlines_id_remove

In [8]:
orderlines = orderlines.loc[orderlines.id_order.isin(orderlines_id_remove.id_order)]
orderlines

Unnamed: 0,id,id_order,product_quantity,sku,unit_price,date,total_price,check_products
0,1119109,299539,1,OTT0133,18.99,2017-01-01 00:07:19,18.99,False
1,1119110,299540,1,LGE0043,399.00,2017-01-01 00:19:45,399.00,False
2,1119111,299541,1,PAR0071,474.05,2017-01-01 00:20:57,474.05,False
3,1119112,299542,1,WDT0315,68.39,2017-01-01 00:51:40,68.39,False
4,1119113,299543,1,JBL0104,23.74,2017-01-01 01:06:38,23.74,False
...,...,...,...,...,...,...,...,...
293413,1650199,527398,1,JBL0122,42.99,2018-03-14 13:57:25,42.99,False
293414,1650200,527399,1,PAC0653,141.58,2018-03-14 13:57:34,141.58,False
293415,1650201,527400,2,APP0698,9.99,2018-03-14 13:57:41,19.98,False
293416,1650202,527388,1,BEZ0204,19.99,2018-03-14 13:58:01,19.99,False


comparing row-counts, we see that about 3000 rows were removed.

Since id_orders matches orders.order_id, we can also remove these rows from orders

In [9]:
orders

Unnamed: 0,order_id,created_date,total_paid,state
0,241319,2017-01-02 13:35:40,44.99,Cancelled
1,241423,2017-11-06 13:10:02,136.15,Completed
2,242832,2017-12-31 17:40:03,15.76,Completed
3,243330,2017-02-16 10:59:38,84.98,Completed
4,243784,2017-11-24 13:35:19,157.86,Cancelled
...,...,...,...,...
204508,527397,2018-03-14 13:56:38,42.99,Place Order
204509,527398,2018-03-14 13:57:25,42.99,Shopping Basket
204510,527399,2018-03-14 13:57:34,141.58,Shopping Basket
204511,527400,2018-03-14 13:57:41,19.98,Shopping Basket


In [10]:
orders = orders.loc[orders.order_id.isin(orderlines.id_order)]
orders

Unnamed: 0,order_id,created_date,total_paid,state
0,241319,2017-01-02 13:35:40,44.99,Cancelled
1,241423,2017-11-06 13:10:02,136.15,Completed
2,242832,2017-12-31 17:40:03,15.76,Completed
3,243330,2017-02-16 10:59:38,84.98,Completed
4,243784,2017-11-24 13:35:19,157.86,Cancelled
...,...,...,...,...
204508,527397,2018-03-14 13:56:38,42.99,Place Order
204509,527398,2018-03-14 13:57:25,42.99,Shopping Basket
204510,527399,2018-03-14 13:57:34,141.58,Shopping Basket
204511,527400,2018-03-14 13:57:41,19.98,Shopping Basket


about 1400 rows got removed from the orders df.
We don't need the column 'check_products' anymore, so it can be dropped.

In the next step we want to compare the prices of the table orders, orderlines and products. We already looked at orders and orderlines in the other notebook, so we focus on orderlines and products here.
Idea here is to left merge orderlines with products on sku. Since we got rid of every sku that's not in the products table, no problems should arise.

In [11]:
orders.state.value_counts()

Shopping Basket    107136
Completed           46139
Place Order         28506
Pending             14238
Cancelled            7110
Name: state, dtype: int64

In [12]:
orderlines_products = pd.merge(orderlines, products, how = 'left', on='sku')
orderlines_products

Unnamed: 0,id,id_order,product_quantity,sku,unit_price,date,total_price,check_products,name,desc,price,promo_price,in_stock,type,dots_promo,no_price,price_promo_difference
0,1119109,299539,1,OTT0133,18.99,2017-01-01 00:07:19,18.99,False,Otterbox iPhone Case Symmetry 2.0 SE / 5s / 5 ...,resistant cover and thin beveled edges for iPh...,34.99,19.99,0,11865403,1,False,-164.91
1,1119110,299540,1,LGE0043,399.00,2017-01-01 00:19:45,399.00,False,"27UD58-B LG Monitor 27 ""4K UHD DisplayPort",Monitor for gamers and multimedia professional...,429.00,399.00,0,1296,2,False,-3561.00
2,1119111,299541,1,PAR0071,474.05,2017-01-01 00:20:57,474.05,False,Parrot Bebop 2 White + Command FLYPAD and FPV ...,cuadricóptero wireless remote control with 25 ...,699.00,569.00,0,11905404,2,False,-4990.99
3,1119112,299542,1,WDT0315,68.39,2017-01-01 00:51:40,68.39,False,"Blue WD 2TB Hard Drive 35 ""Mac and PC",Internal Hard Drive Western Digital 2TB 3.5-in...,79.00,63.99,0,12655397,1,False,-560.95
4,1119113,299543,1,JBL0104,23.74,2017-01-01 01:06:38,23.74,False,Gray Bluetooth Speaker JBL GO,Compact Bluetooth Handsfree for iPhone iPad an...,29.90,27.99,1,5398,1,False,1.91
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
290264,1650199,527398,1,JBL0122,42.99,2018-03-14 13:57:25,42.99,False,JBL T450 BT Bluetooth Headset Black,Wireless headphones with folding design with 1...,49.95,42.99,1,5384,1,False,-379.95
290265,1650200,527399,1,PAC0653,141.58,2018-03-14 13:57:34,141.58,False,Samsung SSD 850 expansion kit EVO 250GB + Data...,SSD upgrade kit 2008-2010 250 GB MacBook and M...,215.98,141.58,1,1433,2,False,-1199.87
290266,1650201,527400,2,APP0698,9.99,2018-03-14 13:57:41,19.98,False,Apple Lightning Cable Connector to USB 1m Whit...,Apple Lightning USB Cable 1 meter to charge an...,25.00,9.99,1,1230,1,False,-74.90
290267,1650202,527388,1,BEZ0204,19.99,2018-03-14 13:58:01,19.99,False,"Be.ez LArobe Case Mix Macbook 12 ""Green",Macbook thin sheath 12 inches.,29.99,19.99,0,13835403,1,False,-169.91


Since I modified some of the prices earlier, I will drop the price_promo_difference for now. Also type, dots_promo and no_price will not be needed

In [13]:
orderlines_products.drop(['type', 'no_price', 'dots_promo', 'price_promo_difference'], axis=1, inplace=True)

It would be interesting to see, if orders make it further in the shopping process when the products have a discount, but we can investigate this question some other time.

For now let's focus on the orders that are completed, since these are the ones that make the actual money. We need the orders table for that.



### Merging all the tables into one



In [14]:
orders_products = orderlines_products.merge(orders, how='left', left_on='id_order', right_on='order_id')
orders_products

Unnamed: 0,id,id_order,product_quantity,sku,unit_price,date,total_price,check_products,name,desc,price,promo_price,in_stock,order_id,created_date,total_paid,state
0,1119109,299539,1,OTT0133,18.99,2017-01-01 00:07:19,18.99,False,Otterbox iPhone Case Symmetry 2.0 SE / 5s / 5 ...,resistant cover and thin beveled edges for iPh...,34.99,19.99,0,299539,2017-01-01 00:07:19,18.99,Shopping Basket
1,1119110,299540,1,LGE0043,399.00,2017-01-01 00:19:45,399.00,False,"27UD58-B LG Monitor 27 ""4K UHD DisplayPort",Monitor for gamers and multimedia professional...,429.00,399.00,0,299540,2017-01-01 00:19:45,399.00,Shopping Basket
2,1119111,299541,1,PAR0071,474.05,2017-01-01 00:20:57,474.05,False,Parrot Bebop 2 White + Command FLYPAD and FPV ...,cuadricóptero wireless remote control with 25 ...,699.00,569.00,0,299541,2017-01-01 00:20:57,474.05,Shopping Basket
3,1119112,299542,1,WDT0315,68.39,2017-01-01 00:51:40,68.39,False,"Blue WD 2TB Hard Drive 35 ""Mac and PC",Internal Hard Drive Western Digital 2TB 3.5-in...,79.00,63.99,0,299542,2017-01-01 00:51:40,68.39,Shopping Basket
4,1119113,299543,1,JBL0104,23.74,2017-01-01 01:06:38,23.74,False,Gray Bluetooth Speaker JBL GO,Compact Bluetooth Handsfree for iPhone iPad an...,29.90,27.99,1,299543,2017-01-01 01:06:38,23.74,Shopping Basket
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
290264,1650199,527398,1,JBL0122,42.99,2018-03-14 13:57:25,42.99,False,JBL T450 BT Bluetooth Headset Black,Wireless headphones with folding design with 1...,49.95,42.99,1,527398,2018-03-14 13:57:25,42.99,Shopping Basket
290265,1650200,527399,1,PAC0653,141.58,2018-03-14 13:57:34,141.58,False,Samsung SSD 850 expansion kit EVO 250GB + Data...,SSD upgrade kit 2008-2010 250 GB MacBook and M...,215.98,141.58,1,527399,2018-03-14 13:57:34,141.58,Shopping Basket
290266,1650201,527400,2,APP0698,9.99,2018-03-14 13:57:41,19.98,False,Apple Lightning Cable Connector to USB 1m Whit...,Apple Lightning USB Cable 1 meter to charge an...,25.00,9.99,1,527400,2018-03-14 13:57:41,19.98,Shopping Basket
290267,1650202,527388,1,BEZ0204,19.99,2018-03-14 13:58:01,19.99,False,"Be.ez LArobe Case Mix Macbook 12 ""Green",Macbook thin sheath 12 inches.,29.99,19.99,0,527388,2018-03-14 13:51:59,34.98,Shopping Basket


since we are already merging stuff, let's get the brands here. Well join on short, after extracting the first three letter from the sku.

In [15]:
orders_products['short'] = orders_products.sku.str[:3]
total_orders = orders_products.merge(brands, how='left', on='short')
total_orders.drop(['short', 'check_products', 'id_order', 'id' ],axis=1, inplace=True)
cols = ['order_id', 'sku', 'product_quantity', 'unit_price', 'total_price', 'total_paid', 'price', 'promo_price', 'name', 'date', 'created_date', 'long', 'state', 'desc', 'in_stock']
total_orders = total_orders[cols]
total_orders.rename({'product_quantity':'qty', 'long':'brand'}, axis=1, inplace=True)
total_orders

Unnamed: 0,order_id,sku,qty,unit_price,total_price,total_paid,price,promo_price,name,date,created_date,brand,state,desc,in_stock
0,299539,OTT0133,1,18.99,18.99,18.99,34.99,19.99,Otterbox iPhone Case Symmetry 2.0 SE / 5s / 5 ...,2017-01-01 00:07:19,2017-01-01 00:07:19,Otterbox,Shopping Basket,resistant cover and thin beveled edges for iPh...,0
1,299540,LGE0043,1,399.00,399.00,399.00,429.00,399.00,"27UD58-B LG Monitor 27 ""4K UHD DisplayPort",2017-01-01 00:19:45,2017-01-01 00:19:45,LG,Shopping Basket,Monitor for gamers and multimedia professional...,0
2,299541,PAR0071,1,474.05,474.05,474.05,699.00,569.00,Parrot Bebop 2 White + Command FLYPAD and FPV ...,2017-01-01 00:20:57,2017-01-01 00:20:57,Parrot,Shopping Basket,cuadricóptero wireless remote control with 25 ...,0
3,299542,WDT0315,1,68.39,68.39,68.39,79.00,63.99,"Blue WD 2TB Hard Drive 35 ""Mac and PC",2017-01-01 00:51:40,2017-01-01 00:51:40,Western Digital,Shopping Basket,Internal Hard Drive Western Digital 2TB 3.5-in...,0
4,299543,JBL0104,1,23.74,23.74,23.74,29.90,27.99,Gray Bluetooth Speaker JBL GO,2017-01-01 01:06:38,2017-01-01 01:06:38,JBL,Shopping Basket,Compact Bluetooth Handsfree for iPhone iPad an...,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
290264,527398,JBL0122,1,42.99,42.99,42.99,49.95,42.99,JBL T450 BT Bluetooth Headset Black,2018-03-14 13:57:25,2018-03-14 13:57:25,JBL,Shopping Basket,Wireless headphones with folding design with 1...,1
290265,527399,PAC0653,1,141.58,141.58,141.58,215.98,141.58,Samsung SSD 850 expansion kit EVO 250GB + Data...,2018-03-14 13:57:34,2018-03-14 13:57:34,Pack,Shopping Basket,SSD upgrade kit 2008-2010 250 GB MacBook and M...,1
290266,527400,APP0698,2,9.99,19.98,19.98,25.00,9.99,Apple Lightning Cable Connector to USB 1m Whit...,2018-03-14 13:57:41,2018-03-14 13:57:41,Apple,Shopping Basket,Apple Lightning USB Cable 1 meter to charge an...,1
290267,527388,BEZ0204,1,19.99,19.99,34.98,29.99,19.99,"Be.ez LArobe Case Mix Macbook 12 ""Green",2018-03-14 13:58:01,2018-03-14 13:51:59,Be.ez,Shopping Basket,Macbook thin sheath 12 inches.,0


that's a nice table. now let's limit ourselves to the completed orders. if the dates for these orders do not differ by too much, we can delete one of those too. 

In [16]:
orders_comp = total_orders.loc[total_orders.state == 'Completed'].drop('state', axis =1)
orders_comp['date'] = pd.to_datetime(orders_comp['date'])
orders_comp['created_date'] = pd.to_datetime(orders_comp['created_date'])
orders_comp['date_diff'] = orders_comp['created_date'] -orders_comp['date']
orders_comp.sort_values('date_diff')

Unnamed: 0,order_id,sku,qty,unit_price,total_price,total_paid,price,promo_price,name,date,created_date,brand,desc,in_stock,date_diff
106445,363564,KAN0053,1,78.50,78.50,1109.08,99.99,94.99,Kanex GoPower external battery 15000mAh USB-C ...,2017-08-28 13:42:45,2017-06-09 19:26:17,Kanex,USB 15000mAh Portable Battery and USB-C connec...,0,-80 days +05:43:32
106444,363564,APP2136,1,1010.59,1010.59,1109.08,1289.00,1252.00,"Apple iPad Pro 12.9 ""Wi-Fi 512GB Silver",2017-08-28 13:42:16,2017-06-09 19:26:17,Apple,New iPad Pro 12.9 inch Wi-Fi 512GB,0,-80 days +05:44:01
247720,457422,EVU0016,1,16.99,16.99,21.98,29.99,19.99,Evutec Aergo Ballistic Nylon Case + Support iP...,2018-01-26 14:10:23,2017-12-07 23:48:36,Evutec,Cover with anti-impact nylon plastic and iPhon...,1,-50 days +09:38:13
136891,401847,PAC2143,1,2259.99,2259.99,2259.99,2839.00,2599.00,"Apple iMac 27 ""Core i5 3.8GHz Retina 5K | 16GB...",2017-10-31 14:40:42,2017-09-19 14:42:11,Pack,IMac desktop computer 27 inch Retina 5K RAM 16...,1,-42 days +00:01:29
60422,339808,SHE0044,1,6.99,6.99,10.98,9.99,2.99,SwitchEasy iPhone Case 0.35 8/7 White,2017-05-05 11:51:14,2017-03-27 19:56:01,SwitchEasy,Ultra Thin Case for iPhone 8/7 in white,0,-39 days +08:04:47
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38347,333665,BEL0198,1,19.99,19.99,66.97,34.99,19.99,Belkin Rockstar Road 4 USB Car Charger 7.2A Black,2017-03-09 16:15:27,2017-07-05 17:47:06,Belkin,Car Charger with 4 USB ports (1A and 2.4A) for...,1,118 days 01:31:39
59468,352100,LAC0198,1,89.79,89.79,93.78,109.99,90.00,LaCie Porsche Design Mobile Hard Drive Disk US...,2017-05-03 02:16:00,2017-10-29 11:46:37,LaCie,Aluminum External Hard Drive 2TB with USB3.0 c...,1,179 days 09:30:37
105186,392327,OWC0057-2,1,41.99,41.99,127.97,47.98,41.99,Mac OWC memory 4GB (2x2GB) SO-DIMM DDR2 800MHz,2017-08-24 12:11:52,2018-03-06 23:18:06,OWC,RAM 4GB (2x2GB) iMac (2008) and MacBook (2009).,0,194 days 11:06:14
74469,365428,SAM0074,1,150.50,150.50,429.18,179.99,134.99,Samsung 850 EVO SSD Disk 500GB,2017-06-14 00:14:30,2017-12-26 01:35:31,Samsung,SSD hard drive Mac and PC 25 inch 500GB SATA I...,1,195 days 01:21:01


After looking into that, there are some outliers, but most of the dates are pretty close and I could see no anomalies in the order...maybe date refers to putting sth in the basket and created_date is created after finishing the order. So I will keep created_date for now

In [17]:
orders_comp.drop(['date', 'date_diff'],axis=1, inplace=True)
orders_comp.rename({'created_date':'order_date'}, axis=1, inplace=True)
orders_comp

Unnamed: 0,order_id,sku,qty,unit_price,total_price,total_paid,price,promo_price,name,order_date,brand,desc,in_stock
6,299545,OWC0100,1,47.49,47.49,51.48,60.99,49.99,OWC In-line Digital Temperature Sensor Kit HDD...,2017-01-01 01:51:47,OWC,Kit temperature sensor for HDD iMac 21 inch an...,1
7,299546,IOT0014,1,18.99,18.99,18.99,22.95,16.99,iOttie Easy View 2 Car Black Support,2017-01-01 01:57:34,iOttie,IPhone car holder 7 plus / 7/6 Plus / 6 / 5s /...,0
8,295347,APP0700,1,72.19,72.19,72.19,89.00,64.99,Apple 85W MagSafe 2 charger MacBook Pro screen...,2017-01-01 02:02:38,Apple,Apple MagSafe 2 Charger for MacBook Pro 15-inc...,1
10,299549,PAC0929,1,2565.99,2565.99,2565.99,3209.00,2667.99,"Apple iMac 27 ""Core i5 3.2GHz Retina 5K | 32GB...",2017-01-02 10:00:20,Pack,IMac desktop computer 27 inch Retina 5K RAM 32...,0
17,299556,CRU0039-A,1,60.90,60.90,65.89,76.99,70.70,(Open) Crucial 240GB SSD 7mm BX200,2017-01-01 02:30:08,Crucial,SSD hard drive and high-speed performance with...,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
289885,525664,TUC0207,1,16.52,16.52,85.73,24.99,19.99,Tucano Elements Second Skin Macbook Sleeve 12 ...,2018-03-14 11:56:19,Tucano,velvety inner protective case for MacBook 12 i...,0
289902,527070,APP0698,2,9.99,19.98,24.97,25.00,9.99,Apple Lightning Cable Connector to USB 1m Whit...,2018-03-14 11:50:48,Apple,Apple Lightning USB Cable 1 meter to charge an...,1
289907,527074,APP0698,2,9.99,19.98,24.97,25.00,9.99,Apple Lightning Cable Connector to USB 1m Whit...,2018-03-14 11:51:42,Apple,Apple Lightning USB Cable 1 meter to charge an...,1
289930,527096,APP0698,3,9.99,29.97,34.96,25.00,9.99,Apple Lightning Cable Connector to USB 1m Whit...,2018-03-14 11:58:40,Apple,Apple Lightning USB Cable 1 meter to charge an...,1


# Fixing the products' price issue

Next we'll look at price_differences again. Maybe we find some extreme outliers between the unit price and the price. I know that some (probably many) of the prices had not been fixed by me when I looked at the products table. Let's see how bad it really is.

In [18]:
orders_comp['price_diff'] = orders_comp['price']-orders_comp['unit_price']
orders_comp.sort_values('price_diff', ascending = False).head(100)

Unnamed: 0,order_id,sku,qty,unit_price,total_price,total_paid,price,promo_price,name,order_date,brand,desc,in_stock,price_diff
283427,523397,UBI0007,1,359.99,359.99,366.98,35998.95,3599.89,Ubiquiti Amplifi Wi-Fi Mesh Router + 2 Mesh Ac...,2018-03-08 17:48:21,,Wi-Fi high-density intelligent Mesh technology,0,35638.96
140495,423503,APP2494,1,180.99,180.99,200.98,21900.03,2190.0,Apple TV 4K 64GB,2017-11-08 17:12:10,Apple,Apple multimedia player with 4K resolution and...,1,21719.04
207410,470483,APP2494,1,180.99,180.99,200.98,21900.03,2190.0,Apple TV 4K 64GB,2017-12-23 21:40:25,Apple,Apple multimedia player with 4K resolution and...,1,21719.04
141562,424462,APP2494,1,208.05,208.05,212.04,21900.03,2190.0,Apple TV 4K 64GB,2017-11-10 12:09:11,Apple,Apple multimedia player with 4K resolution and...,1,21691.98
142446,425312,APP2494,1,208.05,208.05,215.04,21900.03,2190.0,Apple TV 4K 64GB,2017-11-11 16:00:03,Apple,Apple multimedia player with 4K resolution and...,1,21691.98
143368,329976,APP2494,1,208.05,208.05,541.08,21900.03,2190.0,Apple TV 4K 64GB,2017-11-12 23:02:37,Apple,Apple multimedia player with 4K resolution and...,1,21691.98
143600,424537,APP2494,1,219.0,219.0,1967.98,21900.03,2190.0,Apple TV 4K 64GB,2017-11-13 13:10:32,Apple,Apple multimedia player with 4K resolution and...,1,21681.03
272841,515481,APP2494,4,219.0,876.0,882.99,21900.03,2190.0,Apple TV 4K 64GB,2018-02-21 15:10:01,Apple,Apple multimedia player with 4K resolution and...,1,21681.03
206957,470058,APP2494,1,219.0,219.0,225.99,21900.03,2190.0,Apple TV 4K 64GB,2017-12-23 00:49:56,Apple,Apple multimedia player with 4K resolution and...,1,21681.03
248213,499587,APP2494,1,219.0,219.0,493.96,21900.03,2190.0,Apple TV 4K 64GB,2018-01-31 17:52:34,Apple,Apple multimedia player with 4K resolution and...,1,21681.03


The tail looks very good. There are people that paid more for the product than they should have to, but the data seems fine. Sometimes I noticed that the total_paid is a multiple of the unit_price, but the qty was 1. Maybe this was a mistake on the buyer's part. 

The head shows anomalies that I expected. Some products have a price 100-times higher than it should (not many though). Some have a price thats 10-times higher. But overall it's clear how to fix this. 
Looks like prices above 15000 have the '100-times'-issue. Let's fix it and look once again.

In [19]:
orders_comp.loc[orders_comp.price > 15000, 'price'] /= 100

In [20]:
orders_comp['price_diff'] = orders_comp['price']-orders_comp['unit_price']
orders_comp.sort_values('price', ascending = False).head(100)

Unnamed: 0,order_id,sku,qty,unit_price,total_price,total_paid,price,promo_price,name,order_date,brand,desc,in_stock,price_diff
122174,407440,APP2133-A,1,1274.86,1274.86,1274.86,14490.0,12748.61,"Like new - Apple iPad Pro 12.9 ""Wi-Fi + Cellul...",2017-10-02 16:27:03,Apple,iPad Pro 12.9 inch Wi-Fi refurbished 512GB Silver,0,13215.14
70688,362010,PAC1670,1,1293.99,1293.99,1293.99,13932.38,13203.68,QNAP TS-453A | 16GB RAM | Seagate 16TB Iron Wolf,2017-06-03 19:07:39,Pack,NAS QNAP TS-453A with 16GB of RAM memory + 16T...,0,12638.39
221422,482555,APP2492,1,1329.0,1329.0,1329.0,13290.01,13290.01,Apple iPhone X 256GB Silver,2018-01-04 22:35:27,Apple,New Apple iPhone X 256GB Silver Free,1,11961.01
226775,486051,APP2491,1,1329.0,1329.0,1329.0,13290.01,13290.01,Apple iPhone X 256GB Space Gray,2018-01-08 23:45:05,Apple,New Apple iPhone X 256GB Free Space Gray,1,11961.01
260377,506704,APP2492,1,1329.0,1329.0,1335.99,13290.01,13290.01,Apple iPhone X 256GB Silver,2018-02-06 10:51:47,Apple,New Apple iPhone X 256GB Silver Free,1,11961.01
273719,515572,APP2491,1,1329.0,1329.0,1430.97,13290.01,13290.01,Apple iPhone X 256GB Space Gray,2018-02-22 20:09:27,Apple,New Apple iPhone X 256GB Free Space Gray,1,11961.01
220743,472802,APP2491,1,1329.0,1329.0,1329.0,13290.01,13290.01,Apple iPhone X 256GB Space Gray,2018-01-05 00:13:41,Apple,New Apple iPhone X 256GB Free Space Gray,1,11961.01
220689,482253,APP2491,1,1329.0,1329.0,1329.0,13290.01,13290.01,Apple iPhone X 256GB Space Gray,2018-01-04 18:03:41,Apple,New Apple iPhone X 256GB Free Space Gray,1,11961.01
220202,482065,APP2492,1,1329.0,1329.0,2547.98,13290.01,13290.01,Apple iPhone X 256GB Silver,2018-01-04 16:55:18,Apple,New Apple iPhone X 256GB Silver Free,1,11961.01
220190,482053,APP2492,1,1329.0,1329.0,1329.0,13290.01,13290.01,Apple iPhone X 256GB Silver,2018-01-04 16:34:36,Apple,New Apple iPhone X 256GB Silver Free,1,11961.01


prices above 8250 have the '10-times'-issue when looking at the orders sorted by price.
After that I'll look at the price difference again.

In [21]:
orders_comp.loc[orders_comp.price > 8250, 'price'] /= 10

In [22]:
orders_comp['price_diff'] = orders_comp['price']-orders_comp['unit_price']
orders_comp.loc[orders_comp.brand== 'Repair'].sort_values('price_diff', ascending=False)

Unnamed: 0,order_id,sku,qty,unit_price,total_price,total_paid,price,promo_price,name,order_date,brand,desc,in_stock,price_diff
152602,434054,REP0362,1,62.99,62.99,72.98,6999.0,69.99,iPad mini battery repair,2017-11-23 10:57:57,Repair,Repair service including parts and labor for i...,0,6936.01
168514,446237,REP0365,1,62.99,62.99,72.98,6999.0,69.99,iPad mini charging connector repair,2017-11-27 17:03:30,Repair,Repair service including parts and labor for i...,0,6936.01
116071,402009,REP0348,1,69.99,69.99,79.98,6999.0,69.99,Wi-Fi antenna repair iPad 4,2017-09-19 21:36:49,Repair,Repair service including parts and labor for i...,0,6929.01
105852,392925,REP0319,1,69.99,69.99,79.98,6999.0,69.99,Connector Repair iPhone 6s load,2017-08-26 18:30:03,Repair,Repair service including parts and labor for i...,0,6929.01
219762,481658,REP0365,1,69.99,69.99,79.98,6999.0,69.99,iPad mini charging connector repair,2018-01-04 10:30:45,Repair,Repair service including parts and labor for i...,0,6929.01
54401,347403,REP0391,1,69.99,69.99,79.98,6999.0,69.99,iPad 3 battery repair,2017-04-19 13:52:51,Repair,Repair service including parts and labor for i...,0,6929.01
136730,420253,REP0341,1,69.99,69.99,79.98,6999.0,69.99,battery repair iPad 4,2017-10-31 11:06:32,Repair,Repair service including parts and labor for i...,0,6929.01
254639,503297,REP0327,1,69.99,69.99,79.98,6999.0,69.99,Connector Repair iPhone 6s Plus loading,2018-01-31 22:08:50,Repair,Repair service including parts and labor for i...,0,6929.01
267580,511304,REP0369,1,69.99,69.99,79.98,6999.0,69.99,Battery Repair iPad Mini 2,2018-02-14 10:34:14,Repair,Repair service including parts and labor for i...,0,6929.01
128992,407450,REP0188,1,199.99,199.99,209.98,2099.89,2099.89,Full Screen Repair iPad Mini 2,2017-10-02 16:33:07,Repair,Repair service including parts and labor for i...,0,1899.9


The 'Repair' brand is causing problems. I saw it when I first looked into the data. Now is the time to tackle the problem.
Prices above 2000 (or price_diff > 5000) --> divide by 100
price above 300 (or price_diff > 100) --> divide by 10

In [23]:
orders_comp.loc[(orders_comp.brand == 'Repair') & (orders_comp.price_diff > 5000), 'price'] /= 100
orders_comp.loc[(orders_comp.brand == 'Repair') & (orders_comp.price_diff > 100), 'price'] /= 10

In [24]:
orders_comp['price_diff'] = orders_comp['price']-orders_comp['unit_price']
orders_comp.sort_values('price_diff', ascending = False).head(100)

Unnamed: 0,order_id,sku,qty,unit_price,total_price,total_paid,price,promo_price,name,order_date,brand,desc,in_stock,price_diff
26914,323756,PAC1513,1,644.09,644.09,644.09,8136.69,6609.9,Pack QNAP TS-251A NAS Server | 8GB RAM | WD 8T...,2017-02-13 20:07:34,Pack,NAS with 8GB RAM and two hard drives 4TB (2x4T...,0,7492.6
15239,313460,PAC1689,1,641.99,641.99,641.99,7756.49,6711.78,Pack QNAP TS-251A NAS Server | 8GB RAM | Seaga...,2017-01-22 23:17:03,Pack,NAS with 8GB RAM and 8TB (2x4TB) Seagate Iron ...,0,7114.5
244575,497033,TRA0044-A,1,494.04,494.04,514.03,7109.0,5977.88,Open - Transcend JetDrive PCIe SSD 820 M13-M15...,2018-01-25 21:21:19,Trascend,Kit 960GB SSD expansion refitted for Macbook P...,0,6614.96
79740,370047,SAN0131-A,1,56.93,56.93,61.92,6352.86,635.29,"Open - SanDisk 120GB SSD Plus 25 ""SATA 6Gb / s",2017-06-28 18:51:45,SanDisk,SSD 120GB 25 inches,0,6295.93
19666,317377,PAC1512,1,571.89,571.89,566.89,6736.69,6191.79,Pack QNAP TS-251A NAS Server | 8GB RAM | WD 6T...,2017-01-30 12:00:18,Pack,NAS with 8GB RAM and two hard disks 3TB (2x3TB...,0,6164.8
74673,365623,APP1709,1,669.0,669.0,669.0,6690.01,6690.01,Apple Watch Series 2 38mm Stainless Steel Case...,2017-06-14 16:23:53,Apple,Apple Watch 38 mm with GPS dual-core processor...,0,6021.01
8941,307945,APP0880,1,629.99,629.99,629.99,6628.14,6628.14,Apple iPad Air 2 Wi-Fi + Cellular 128GB Dorado,2017-01-16 10:50:11,Apple,New iPad Air 2 Wi-Fi + Cellular 128GB (MH1G2TY...,0,5998.15
6321,305574,APP0880,1,629.99,629.99,629.99,6628.14,6628.14,Apple iPad Air 2 Wi-Fi + Cellular 128GB Dorado,2017-01-09 12:27:19,Apple,New iPad Air 2 Wi-Fi + Cellular 128GB (MH1G2TY...,0,5998.15
25347,322345,APP0880,3,632.99,1898.97,1898.97,6628.14,6628.14,Apple iPad Air 2 Wi-Fi + Cellular 128GB Dorado,2017-02-10 17:30:20,Apple,New iPad Air 2 Wi-Fi + Cellular 128GB (MH1G2TY...,0,5995.15
8260,307323,APP0879,2,632.99,1265.98,1265.98,6628.11,6628.11,Apple iPad Air 2 Wi-Fi + Cellular 128GB Silver,2017-01-11 14:57:14,Apple,New iPad Air 2 Wi-Fi + Cellular 128GB (MGWM2TY...,0,5995.12


I saw some pattern for huge price differences (above 2000):
if unit_price > 100 --> divide by 10
if unit_price < 100 --> divide by 100

In [25]:
orders_comp.loc[(orders_comp.price_diff > 2000) & (orders_comp.unit_price > 100), 'price'] /= 10
orders_comp.loc[(orders_comp.price_diff > 2000) & (orders_comp.unit_price < 100), 'price'] /= 100

In [26]:
orders_comp['price_diff'] = orders_comp['price']-orders_comp['unit_price']
orders_comp.sort_values('price_diff', ascending = False).head(100)

Unnamed: 0,order_id,sku,qty,unit_price,total_price,total_paid,price,promo_price,name,order_date,brand,desc,in_stock,price_diff
214889,477240,WAC0235,1,18.99,18.99,110.97,1999.04,199.9,Wacom Intuos 4 Case Transport size S,2017-12-30 13:18:28,Wacom,Carrying case for graphics tablet Intuos 4 S,0,1980.05
212673,475203,WAC0235,1,18.99,18.99,22.98,1999.04,199.9,Wacom Intuos 4 Case Transport size S,2017-12-28 17:40:45,Wacom,Carrying case for graphics tablet Intuos 4 S,0,1980.05
211243,473855,WAC0235,1,18.99,18.99,23.98,1999.04,199.9,Wacom Intuos 4 Case Transport size S,2017-12-27 19:42:20,Wacom,Carrying case for graphics tablet Intuos 4 S,0,1980.05
202667,466119,WAC0235,1,19.99,19.99,126.96,1999.04,199.9,Wacom Intuos 4 Case Transport size S,2017-12-18 20:55:16,Wacom,Carrying case for graphics tablet Intuos 4 S,0,1979.05
64804,356743,WAC0235,1,19.99,19.99,93.97,1999.04,199.9,Wacom Intuos 4 Case Transport size S,2017-05-18 00:02:55,Wacom,Carrying case for graphics tablet Intuos 4 S,0,1979.05
132155,416282,WAC0235,1,19.99,19.99,24.98,1999.04,199.9,Wacom Intuos 4 Case Transport size S,2017-10-23 15:11:15,Wacom,Carrying case for graphics tablet Intuos 4 S,0,1979.05
119967,402385,WAC0235,1,19.99,19.99,26.98,1999.04,199.9,Wacom Intuos 4 Case Transport size S,2017-09-27 16:13:58,Wacom,Carrying case for graphics tablet Intuos 4 S,0,1979.05
254068,502940,WAC0235,1,19.99,19.99,37.96,1999.04,199.9,Wacom Intuos 4 Case Transport size S,2018-01-31 15:59:56,Wacom,Carrying case for graphics tablet Intuos 4 S,0,1979.05
284035,523894,WDT0400,1,204.99,204.99,211.98,2049.95,2049.95,WD My Cloud Home 3TB USB 3.0,2018-03-09 16:48:28,Western Digital,1-bay NAS server for Mac and PC,0,1844.96
155830,251688,APP2493,1,152.95,152.95,505.76,1990.0,194.0,Apple TV 32GB 4K,2017-11-24 23:57:07,Apple,Apple multimedia player with 4K resolution and...,1,1837.05


made a mistake down the line. but I can fix these at the end, since the diff is now at -7000 for one.
For now I'll do the same as before, but my price_diff will be 1500

In [27]:
orders_comp.loc[(orders_comp.price_diff > 1500) & (orders_comp.unit_price > 100), 'price'] /= 10
orders_comp.loc[(orders_comp.price_diff > 1500) & (orders_comp.unit_price < 100), 'price'] /= 100

In [28]:
orders_comp['price_diff'] = orders_comp['price']-orders_comp['unit_price']
orders_comp.loc[(orders_comp.unit_price < 1000) & (~orders_comp.name.str.contains('Second hand')) &(orders_comp.price_diff < 900)].sort_values('price_diff', ascending = False).head(100)

Unnamed: 0,order_id,sku,qty,unit_price,total_price,total_paid,price,promo_price,name,order_date,brand,desc,in_stock,price_diff
288441,525824,FIB0007,1,99.99,99.99,136.97,999.94,999.94,Fibaro Monoxide Detector Sensor HomeKit,2018-03-12 11:20:19,Fibaro,Fibaro Monoxide Detector Sensor HomeKit for iP...,1,899.95
283911,523793,FIB0007,1,99.99,99.99,104.98,999.94,999.94,Fibaro Monoxide Detector Sensor HomeKit,2018-03-09 13:19:25,Fibaro,Fibaro Monoxide Detector Sensor HomeKit for iP...,1,899.95
141631,424529,CRU0025-2,1,80.07,80.07,400.37,979.78,96.99,Crucial memory Mac 8GB (2x4GB) SO-DIMM DDR3 16...,2017-11-10 13:26:11,Crucial,8GB RAM (2x4GB) SO-DIMM 1600MHZ iMac and Macbo...,1,899.71
213520,475955,CRU0025-2,1,81.99,81.99,531.06,979.78,96.99,Crucial memory Mac 8GB (2x4GB) SO-DIMM DDR3 16...,2017-12-29 12:12:28,Crucial,8GB RAM (2x4GB) SO-DIMM 1600MHZ iMac and Macbo...,1,897.79
206428,469536,CRU0025-2,1,81.99,81.99,85.98,979.78,96.99,Crucial memory Mac 8GB (2x4GB) SO-DIMM DDR3 16...,2017-12-26 10:35:43,Crucial,8GB RAM (2x4GB) SO-DIMM 1600MHZ iMac and Macbo...,1,897.79
27130,323935,CRU0025-2,1,82.99,82.99,87.98,979.78,96.99,Crucial memory Mac 8GB (2x4GB) SO-DIMM DDR3 16...,2017-02-14 09:22:47,Crucial,8GB RAM (2x4GB) SO-DIMM 1600MHZ iMac and Macbo...,1,896.79
245468,497791,CRU0025-2,1,90.99,90.99,97.98,979.78,96.99,Crucial memory Mac 8GB (2x4GB) SO-DIMM DDR3 16...,2018-01-24 17:30:48,Crucial,8GB RAM (2x4GB) SO-DIMM 1600MHZ iMac and Macbo...,1,888.79
279809,520882,CRU0025-2,1,90.99,90.99,97.98,979.78,96.99,Crucial memory Mac 8GB (2x4GB) SO-DIMM DDR3 16...,2018-03-04 15:22:06,Crucial,8GB RAM (2x4GB) SO-DIMM 1600MHZ iMac and Macbo...,1,888.79
203311,466703,CRU0025-2,1,90.99,90.99,90.99,979.78,96.99,Crucial memory Mac 8GB (2x4GB) SO-DIMM DDR3 16...,2017-12-19 13:00:22,Crucial,8GB RAM (2x4GB) SO-DIMM 1600MHZ iMac and Macbo...,1,888.79
256991,504827,CRU0025-2,1,90.99,90.99,95.98,979.78,96.99,Crucial memory Mac 8GB (2x4GB) SO-DIMM DDR3 16...,2018-02-03 11:15:23,Crucial,8GB RAM (2x4GB) SO-DIMM 1600MHZ iMac and Macbo...,1,888.79


Now we are getting into dangerous territory! There are second hand notebooks and also notebooks with rediculous discounts. We have to be more specific now.

In [29]:
orders_comp.loc[(orders_comp.unit_price < 1000) & (~orders_comp.name.str.contains('Second hand')) &(orders_comp.price_diff > 800), 'price'] /= 10

In [30]:
orders_comp['price_diff'] = orders_comp['price']-orders_comp['unit_price']
orders_comp.loc[(orders_comp.unit_price < 100) &(orders_comp.price_diff < 170)].info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 37387 entries, 6 to 289947
Data columns (total 14 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   order_id     37387 non-null  int64         
 1   sku          37387 non-null  object        
 2   qty          37387 non-null  int64         
 3   unit_price   37387 non-null  float64       
 4   total_price  37387 non-null  float64       
 5   total_paid   37387 non-null  float64       
 6   price        37387 non-null  float64       
 7   promo_price  37387 non-null  float64       
 8   name         37387 non-null  object        
 9   order_date   37387 non-null  datetime64[ns]
 10  brand        37367 non-null  object        
 11  desc         37385 non-null  object        
 12  in_stock     37387 non-null  int64         
 13  price_diff   37387 non-null  float64       
dtypes: datetime64[ns](1), float64(6), int64(3), object(4)
memory usage: 4.3+ MB


In [31]:
orders_comp.loc[(orders_comp.price_diff > 175) & (orders_comp.unit_price < 100),'price'] /= 10

In [32]:
orders_comp['price_diff'] = orders_comp['price']-orders_comp['unit_price']
orders_comp.loc[(orders_comp.unit_price < 100) &(orders_comp.price_diff <70)].sort_values('price_diff',ascending=False).head(100)

Unnamed: 0,order_id,sku,qty,unit_price,total_price,total_paid,price,promo_price,name,order_date,brand,desc,in_stock,price_diff
269242,512773,MOP0112,1,29.99,29.99,67.94,99.95,29.99,Mophie Juice Pack Air Battery Case for iPhone ...,2018-02-16 11:38:53,Mophie,Mophie Battery Case for iPhone 2.420mAh 8 Plus...,0,69.96
269287,503011,MOP0113,1,29.99,29.99,44.98,99.95,29.99,Mophie Juice Pack Air Battery Case iPhone 8/7 ...,2018-02-16 12:29:15,Mophie,Mophie Juice Pack Air Case with battery for Ap...,1,69.96
269635,513152,MOP0110,1,29.99,29.99,34.98,99.95,29.99,Mophie Juice Pack Air Battery Case for iPhone ...,2018-02-17 00:53:30,Mophie,Mophie Battery Case for iPhone 2.420mAh 8 Plus...,1,69.96
269260,512804,MOP0112,1,29.99,29.99,41.97,99.95,29.99,Mophie Juice Pack Air Battery Case for iPhone ...,2018-02-16 12:24:21,Mophie,Mophie Battery Case for iPhone 2.420mAh 8 Plus...,0,69.96
268954,512519,MOP0111,1,29.99,29.99,41.97,99.95,29.99,Mophie Juice Pack Air Battery Case for iPhone ...,2018-02-16 09:22:36,Mophie,Mophie Battery Case for iPhone 2.420mAh 8 Plus...,1,69.96
269328,512861,MOP0113,1,29.99,29.99,42.97,99.95,29.99,Mophie Juice Pack Air Battery Case iPhone 8/7 ...,2018-02-16 12:51:23,Mophie,Mophie Juice Pack Air Case with battery for Ap...,1,69.96
269567,513087,MOP0112,1,29.99,29.99,33.98,99.95,29.99,Mophie Juice Pack Air Battery Case for iPhone ...,2018-02-16 21:00:45,Mophie,Mophie Battery Case for iPhone 2.420mAh 8 Plus...,0,69.96
270609,513721,MOP0113,1,29.99,29.99,34.98,99.95,29.99,Mophie Juice Pack Air Battery Case iPhone 8/7 ...,2018-02-18 13:09:31,Mophie,Mophie Juice Pack Air Case with battery for Ap...,1,69.96
270666,513773,MOP0113,1,29.99,29.99,36.98,99.95,29.99,Mophie Juice Pack Air Battery Case iPhone 8/7 ...,2018-02-18 15:52:59,Mophie,Mophie Juice Pack Air Case with battery for Ap...,1,69.96
269271,512815,MOP0112,1,29.99,29.99,42.97,99.95,29.99,Mophie Juice Pack Air Battery Case for iPhone ...,2018-02-16 12:08:15,Mophie,Mophie Battery Case for iPhone 2.420mAh 8 Plus...,0,69.96


In [33]:
orders_comp.loc[orders_comp.order_id == 377101, 'price'] /= 10
orders_comp.loc[orders_comp.order_id == 382405, 'price'] /= 10

In [34]:
orders_comp.loc[orders_comp.brand == 'Puro'].sort_values('price_diff', ascending=False)

Unnamed: 0,order_id,sku,qty,unit_price,total_price,total_paid,price,promo_price,name,order_date,brand,desc,in_stock,price_diff
68573,360159,PUR0144-A,1,9.18,9.18,23.17,129.91,91.83,"Open - Pure Nude Ultraslim 03 ""7 Clear iPhone ...",2017-05-29 13:14:53,Puro,flexible transparent cover with 03mm thick iPh...,0,120.73
19796,317489,PUR0142,1,12.34,12.34,17.33,129.91,129.91,Pure UltraSlim Case 03 + Protector iPhone 5 / ...,2017-01-30 13:50:40,Puro,03mm thin cover with screen protector included...,0,117.57
68975,360510,PUR0142,1,12.99,12.99,16.98,129.91,129.91,Pure UltraSlim Case 03 + Protector iPhone 5 / ...,2017-05-30 10:46:50,Puro,03mm thin cover with screen protector included...,0,116.92
5959,305257,PUR0142,1,12.99,12.99,16.98,129.91,129.91,Pure UltraSlim Case 03 + Protector iPhone 5 / ...,2017-01-08 23:52:24,Puro,03mm thin cover with screen protector included...,0,116.92
62270,354551,PUR0142,1,12.99,12.99,16.98,129.91,129.91,Pure UltraSlim Case 03 + Protector iPhone 5 / ...,2017-05-10 16:50:09,Puro,03mm thin cover with screen protector included...,0,116.92
69952,361342,PUR0142,1,12.99,12.99,17.98,129.91,129.91,Pure UltraSlim Case 03 + Protector iPhone 5 / ...,2017-06-01 18:35:21,Puro,03mm thin cover with screen protector included...,0,116.92
59384,352024,PUR0142,1,12.99,12.99,27.98,129.91,129.91,Pure UltraSlim Case 03 + Protector iPhone 5 / ...,2017-05-02 21:59:43,Puro,03mm thin cover with screen protector included...,0,116.92
54495,347486,PUR0142,1,12.99,12.99,35.96,129.91,129.91,Pure UltraSlim Case 03 + Protector iPhone 5 / ...,2017-04-20 17:02:25,Puro,03mm thin cover with screen protector included...,0,116.92
53204,344906,PUR0142,1,12.99,12.99,212.97,129.91,129.91,Pure UltraSlim Case 03 + Protector iPhone 5 / ...,2017-04-16 08:02:18,Puro,03mm thin cover with screen protector included...,0,116.92
73707,364746,PUR0142,1,12.99,12.99,421.99,129.91,129.91,Pure UltraSlim Case 03 + Protector iPhone 5 / ...,2017-06-12 11:23:51,Puro,03mm thin cover with screen protector included...,0,116.92


In [35]:
orders_comp.loc[(orders_comp.brand == 'Puro') & (orders_comp.price_diff > 100), 'price'] /= 10

In [36]:
orders_comp.loc[orders_comp.order_id == 387335,'price'] /= 10

In [37]:
orders_comp['price_diff'] = orders_comp['price']-orders_comp['unit_price']
orders_comp.sort_values('price_diff').head(100)

Unnamed: 0,order_id,sku,qty,unit_price,total_price,total_paid,price,promo_price,name,order_date,brand,desc,in_stock,price_diff
167615,445507,LAC0223,1,8287.8,8287.8,8287.8,979.9,7899.99,12big LaCie Hard Disk 120TB RAID Thunderbolt 3...,2017-11-26 16:07:32,LaCie,12 120TB hard drive bays with RAID 5 two ports...,0,-7307.9
20767,318345,LAC0217,1,3959.1,3959.1,3959.1,3399.0,2885.99,6big LaCie Hard Drive 36TB RAID Thunderbolt 3 ...,2017-01-31 17:51:43,LaCie,36TB hard drive bays with RAID 6 5 two ports T...,0,-560.1
271224,514165,APP2090,1,2610.01,2610.01,2715.99,2305.59,2167.0,"Apple iMac 27 ""Core i5 Retina 5K 35GHz | 8GB R...",2018-02-19 18:07:07,Apple,IMac desktop computer 27 inch 5K Retina 8GB RA...,0,-304.42
74907,365831,GTE0106,1,1649.99,1649.99,1649.99,1358.99,1109.99,G-Technology G-RAID Thunderbolt Hard Drive 20T...,2017-06-15 10:23:49,G-Technology,20TB RAID disk with 2 bays and removable disks...,0,-291.0
69113,360627,WAC0162,1,2289.99,2289.99,4238.97,2049.9,1782.0,Wacom Cintiq Interactive Pen Display 27QHD Mac...,2017-05-30 15:12:49,Wacom,professional graphics tablet with widescreen 1...,0,-240.09
6191,305450,GTE0081,3,1549.99,4649.97,5549.95,1358.99,1124.99,G-Technology G-RAID Thunderbolt 20TB Extraible...,2017-01-09 09:55:53,G-Technology,20TB RAID disk with 2 bays and removable disks...,0,-191.0
146039,428397,PAC1388,1,848.32,848.32,962.54,673.7,815.18,Pack QNAP TS-251 + | 2GB RAM | WD 16TB Network,2017-11-24 13:27:54,Pack,Nas QNAP TS-251 + 2GB RAM memory + 16TB (2x8TB...,1,-174.62
7895,306995,LAC0179,1,889.99,889.99,889.99,719.0,653.99,12TB LaCie 2big Thunderbolt 2,2017-01-11 17:23:29,LaCie,Dual RAID disk box 12TB Thunderbolt 2 for Mac ...,0,-170.99
4729,304102,LAC0179,1,889.99,889.99,889.99,719.0,653.99,12TB LaCie 2big Thunderbolt 2,2017-01-07 10:22:23,LaCie,Dual RAID disk box 12TB Thunderbolt 2 for Mac ...,0,-170.99
97876,385961,APP1988,1,1020.33,1020.33,1020.33,889.0,872.0,Apple iPhone 7 Plus (PRODUCT) Red 128GB,2017-08-03 19:08:27,Apple,New iPhone 7 plus special edition red free 128GB,0,-131.33


Now fix the mistakes I made along the way

In [38]:
orders_comp.loc[orders_comp.order_id == 445507, 'price'] *= 10

In [39]:
orders_comp['price_diff'] = orders_comp['price']-orders_comp['unit_price']
orders_comp.loc[orders_comp.brand == 'Crucial'].sort_values('price_diff').head(10)

Unnamed: 0,order_id,sku,qty,unit_price,total_price,total_paid,price,promo_price,name,order_date,brand,desc,in_stock,price_diff
107008,393917,CRU0015-2,2,94.99,189.98,196.97,16.4,1629.89,Crucial memory Mac 16GB (2x8GB) SO-DIMM DDR3 1...,2017-08-29 15:20:11,Crucial,RAM 16GB (2x8GB) 135V MacBook Pro iMac (2012/2...,1,-78.59
107030,393937,CRU0015-2,1,94.99,94.99,875.16,16.4,1629.89,Crucial memory Mac 16GB (2x8GB) SO-DIMM DDR3 1...,2017-08-29 16:16:11,Crucial,RAM 16GB (2x8GB) 135V MacBook Pro iMac (2012/2...,1,-78.59
30862,327096,CRU0015-2,1,92.97,92.97,99.96,16.4,1629.89,Crucial memory Mac 16GB (2x8GB) SO-DIMM DDR3 1...,2017-02-21 12:38:51,Crucial,RAM 16GB (2x8GB) 135V MacBook Pro iMac (2012/2...,1,-76.57
3758,301102,CRU0025-2,1,136.87,136.87,136.87,97.98,96.99,Crucial memory Mac 8GB (2x4GB) SO-DIMM DDR3 16...,2017-01-02 22:50:11,Crucial,8GB RAM (2x4GB) SO-DIMM 1600MHZ iMac and Macbo...,1,-38.89
238096,493368,CRU0015-2,1,177.99,177.99,182.98,163.98,1629.89,Crucial memory Mac 16GB (2x8GB) SO-DIMM DDR3 1...,2018-01-18 08:25:13,Crucial,RAM 16GB (2x8GB) 135V MacBook Pro iMac (2012/2...,1,-14.01
229086,487400,CRU0015-2,2,177.99,355.98,362.97,163.98,1629.89,Crucial memory Mac 16GB (2x8GB) SO-DIMM DDR3 1...,2018-01-10 08:06:48,Crucial,RAM 16GB (2x8GB) 135V MacBook Pro iMac (2012/2...,1,-14.01
233680,490473,CRU0015-2,1,177.99,177.99,382.97,163.98,1629.89,Crucial memory Mac 16GB (2x8GB) SO-DIMM DDR3 1...,2018-01-14 12:48:11,Crucial,RAM 16GB (2x8GB) 135V MacBook Pro iMac (2012/2...,1,-14.01
231190,486510,CRU0061,1,149.99,149.99,153.98,139.67,129.99,Crucial MX500 500GB SSD 7mm,2018-01-11 18:19:05,Crucial,SSD 500GB SATA Hard Disk III (6 Gb / s) for Ma...,1,-10.32
231418,488989,CRU0061,1,149.99,149.99,153.98,139.67,129.99,Crucial MX500 500GB SSD 7mm,2018-01-11 23:47:42,Crucial,SSD 500GB SATA Hard Disk III (6 Gb / s) for Ma...,1,-10.32
237736,493096,CRU0061,1,149.99,149.99,153.98,139.67,129.99,Crucial MX500 500GB SSD 7mm,2018-01-17 17:57:31,Crucial,SSD 500GB SATA Hard Disk III (6 Gb / s) for Ma...,1,-10.32


In [40]:
orders_comp.loc[(orders_comp.brand == 'Crucial') &(orders_comp.price_diff < -50), 'price'] *= 10

In [41]:
orders_comp['price_diff'] = orders_comp['price']-orders_comp['unit_price']
orders_comp.loc[orders_comp.brand == 'Repair'].sort_values('price_diff').head(100)

Unnamed: 0,order_id,sku,qty,unit_price,total_price,total_paid,price,promo_price,name,order_date,brand,desc,in_stock,price_diff
177059,446921,REP0396,1,189.99,189.99,199.98,119.99,119.99,Full screen repair iPhone 7 Plus,2017-11-29 07:08:38,Repair,Repair service including parts and labor for i...,0,-70.0
97186,385352,REP0396,1,189.99,189.99,199.98,119.99,119.99,Full screen repair iPhone 7 Plus,2017-08-02 12:07:21,Repair,Repair service including parts and labor for i...,0,-70.0
136730,420253,REP0341,1,69.99,69.99,79.98,7.0,69.99,battery repair iPad 4,2017-10-31 11:06:32,Repair,Repair service including parts and labor for i...,0,-62.99
219762,481658,REP0365,1,69.99,69.99,79.98,7.0,69.99,iPad mini charging connector repair,2018-01-04 10:30:45,Repair,Repair service including parts and labor for i...,0,-62.99
105852,392925,REP0319,1,69.99,69.99,79.98,7.0,69.99,Connector Repair iPhone 6s load,2017-08-26 18:30:03,Repair,Repair service including parts and labor for i...,0,-62.99
116071,402009,REP0348,1,69.99,69.99,79.98,7.0,69.99,Wi-Fi antenna repair iPad 4,2017-09-19 21:36:49,Repair,Repair service including parts and labor for i...,0,-62.99
54401,347403,REP0391,1,69.99,69.99,79.98,7.0,69.99,iPad 3 battery repair,2017-04-19 13:52:51,Repair,Repair service including parts and labor for i...,0,-62.99
267580,511304,REP0369,1,69.99,69.99,79.98,7.0,69.99,Battery Repair iPad Mini 2,2018-02-14 10:34:14,Repair,Repair service including parts and labor for i...,0,-62.99
254639,503297,REP0327,1,69.99,69.99,79.98,7.0,69.99,Connector Repair iPhone 6s Plus loading,2018-01-31 22:08:50,Repair,Repair service including parts and labor for i...,0,-62.99
152602,434054,REP0362,1,62.99,62.99,72.98,7.0,69.99,iPad mini battery repair,2017-11-23 10:57:57,Repair,Repair service including parts and labor for i...,0,-55.99


In [42]:
orders_comp.loc[(orders_comp.brand == 'Repair') & (orders_comp.promo_price == 69.99), 'price'] *= 10

In [43]:
orders_comp['price_diff'] = orders_comp['price']-orders_comp['unit_price']
orders_comp.loc[orders_comp.brand == 'Repair'].sort_values('price_diff').head(100)

Unnamed: 0,order_id,sku,qty,unit_price,total_price,total_paid,price,promo_price,name,order_date,brand,desc,in_stock,price_diff
97186,385352,REP0396,1,189.99,189.99,199.98,119.99,119.99,Full screen repair iPhone 7 Plus,2017-08-02 12:07:21,Repair,Repair service including parts and labor for i...,0,-70.0
177059,446921,REP0396,1,189.99,189.99,199.98,119.99,119.99,Full screen repair iPhone 7 Plus,2017-11-29 07:08:38,Repair,Repair service including parts and labor for i...,0,-70.0
170910,448191,REP0396,1,170.99,170.99,180.98,119.99,119.99,Full screen repair iPhone 7 Plus,2017-11-27 13:42:52,Repair,Repair service including parts and labor for i...,0,-51.0
96584,384829,REP0323,1,159.99,159.99,199.97,109.99,109.99,Full screen repair iPhone 6s Plus,2017-08-01 11:25:05,Repair,Repair service including parts and labor for i...,0,-50.0
51745,345205,REP0323,1,159.99,159.99,169.98,109.99,109.99,Full screen repair iPhone 6s Plus,2017-04-19 22:47:36,Repair,Repair service including parts and labor for i...,0,-50.0
66538,358329,REP0313,1,139.99,139.99,149.98,99.99,99.99,Full screen repair iPhone 6s,2017-05-23 11:47:11,Repair,Repair service including parts and labor for i...,0,-40.0
78820,369237,REP0313,1,139.99,139.99,149.98,99.99,99.99,Full screen repair iPhone 6s,2017-06-26 17:20:07,Repair,Repair service including parts and labor for i...,0,-40.0
49267,343072,REP0313,1,139.99,139.99,149.98,99.99,99.99,Full screen repair iPhone 6s,2017-04-04 12:17:40,Repair,Repair service including parts and labor for i...,0,-40.0
45887,340110,REP0313,1,139.99,139.99,149.98,99.99,99.99,Full screen repair iPhone 6s,2017-03-28 16:50:27,Repair,Repair service including parts and labor for i...,0,-40.0
12281,310959,REP0238,1,119.99,119.99,129.98,89.99,89.99,Full Screen Repair iPhone 6,2017-01-17 13:21:26,Repair,Repair service including parts and labor for i...,0,-30.0


Now I'm done with fixing the prices. After looking at all this stuff, I think that the total_paid is often times out of place. I should remove that column. Also I didn't fix the promo price. Since I am interested at the actual discount I won't need the promo_price, I only have to compare unit_price and price.
I will keep the price_diff column. Also I will look at a discount in percent.

When I will create different categories, I want to create a Second hand category!

I'm not sure if I want to keep desc and in_stock. Might be interesting to see, if discounts are not made if sth is out of stock. Not sure if I will look into that though

I will probably keep the desc for the categorization

In [44]:
orders_comp.drop(['promo_price', 'total_paid'], axis = 1, inplace=True)

In [45]:
orders_comp['disc_perc'] = (1 - orders_comp.unit_price / orders_comp.price)*100

# Filling last nan-values

Before categorizing things, I noticed that not all sku's have a brand. I will add the brand names to the orders_comp table. I found the names mostly by googling the name of the products.

In [46]:
orders_comp.loc[orders_comp.brand.isna()]

Unnamed: 0,order_id,sku,qty,unit_price,total_price,price,name,order_date,brand,desc,in_stock,price_diff,disc_perc
1161,300660,par0072,1,199.0,199.0,349.0,Parrot AR Drone 2.0 Elite Edition cuadricópter...,2017-01-02 15:48:02,,Drone cuadricóptero HD video recording autonom...,1,150.0,42.98
1589,301094,LEA0007,1,84.99,84.99,89.99,Leap Motion Sensor controller gestures,2017-01-02 22:40:02,,Sensor motion controller for Mac.,0,5.0,5.56
5448,304785,MRL0008,1,45.99,45.99,59.99,Brown Marley Chant Mini Wireless Speaker,2017-01-08 14:29:15,,Bluetooth small with protective canvas for iPh...,0,14.0,23.34
6813,306015,CAR0007,1,19.99,19.99,29.99,Cardboard Safari Cardboard iPad TV Support,2017-01-09 19:53:46,,Cardboard support with retro TV design for iPad,0,10.0,33.34
8345,307405,LEA0007,1,84.99,84.99,89.99,Leap Motion Sensor controller gestures,2017-01-11 17:01:19,,Sensor motion controller for Mac.,0,5.0,5.56
22485,319829,LEA0001-A,1,72.72,72.72,89.99,Open - Leap Motion Sensor Controller gestures,2017-02-04 12:36:27,,Sensor motion controller for Mac,0,17.27,19.19
27113,323918,MOO0008,1,49.99,49.99,79.95,Now Moov Red Coach Activity Monitor,2017-02-14 08:40:24,,Activity monitor with a personal trainer for i...,0,29.96,37.47
33960,329857,LEP0014,1,16.14,16.14,59.99,Lepow USB 6000mAh External Battery Moonstone W...,2017-02-27 15:58:57,,USB external battery charging mode dual small ...,0,43.85,73.1
57853,350588,HYN0007,4,68.69,274.76,74.99,Mac memory DIMM DDR3 1066MHz 8GB FCM,2017-04-28 01:50:33,,Mac Pro 8GB RAM (2009/10).,0,6.3,8.4
81167,371248,MRL0007,1,129.99,129.99,169.0,Get Together Midnight Marley Wood Speaker,2017-07-01 15:40:26,,Wood wireless speaker power 20W iPhone iPad iPod.,0,39.01,23.08


In [47]:
orders_comp.loc[orders_comp.sku.str[:3] == 'COR', 'brand'] = 'Corning'
orders_comp.loc[orders_comp.sku.str[:3] == 'SMR', 'brand'] = 'SmartHalo' #insolvent btw
orders_comp.loc[orders_comp.sku.str[:3] == 'MRL', 'brand'] = 'House of Marley' #eco friendly music-boxes, keeping the legacy of bob marley's lov for music alive
orders_comp.loc[orders_comp.sku.str[:3] == 'LEP', 'brand'] = 'Lepow' #specialized in high-end mobile electronics, based in HK
orders_comp.loc[orders_comp.sku.str[:3] == 'MOO', 'brand'] = 'Moov' #music, audio service, but also fitness-watches and stuff
orders_comp.loc[orders_comp.sku.str[:3] == 'LEA', 'brand'] = 'Leap Motion' #controlling the notebook with motion, without touching, insolvent
orders_comp.loc[orders_comp.sku.str[:3] == 'CAR', 'brand'] = 'Cardboard Safari'#building animals out of wood, acryl, etc.
orders_comp.loc[orders_comp.sku.str[:3] == 'par', 'brand'] = 'Parrot' #french company, specializes on drones
orders_comp.loc[orders_comp.sku.str[:3] == 'WAH', 'brand'] = 'Wahoo Fitness'#american fitness-technology company
orders_comp.loc[orders_comp.sku.str[:3] == 'NET', 'brand'] = 'Netgear' #networking products
orders_comp.loc[orders_comp.sku.str[:3] == 'EXT', 'brand'] = 'Extreme Fliers'#micro-drones
orders_comp.loc[orders_comp.sku.str[:3] == 'TWI', 'brand'] = 'Twinkly'#italian smartphone controlled LED-lights producer
orders_comp.loc[orders_comp.sku.str[:3] == 'UBT', 'brand'] = 'Ubtech' #chinese company, humanoid robotics
orders_comp.loc[orders_comp.sku.str[:3] == 'LUM', 'brand'] = 'LuMee'#lighting company
orders_comp.loc[orders_comp.sku.str[:3] == 'UBI', 'brand'] = 'Ubiquity'#network components
orders_comp.loc[orders_comp.sku.str[:3] == 'HYN', 'brand'] = 'Hyundai Electronics'#second largest memory chipmaker, third largest semiconductor company

In [48]:
orders_comp.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 61322 entries, 6 to 289947
Data columns (total 13 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   order_id     61322 non-null  int64         
 1   sku          61322 non-null  object        
 2   qty          61322 non-null  int64         
 3   unit_price   61322 non-null  float64       
 4   total_price  61322 non-null  float64       
 5   price        61322 non-null  float64       
 6   name         61322 non-null  object        
 7   order_date   61322 non-null  datetime64[ns]
 8   brand        61322 non-null  object        
 9   desc         61318 non-null  object        
 10  in_stock     61322 non-null  int64         
 11  price_diff   61322 non-null  float64       
 12  disc_perc    61322 non-null  float64       
dtypes: datetime64[ns](1), float64(5), int64(3), object(4)
memory usage: 6.5+ MB


Looks almost good. Since I cannot find a fitting description for the 4 missing items, I just copy the name into it. By doing this I make sure that I can use string methods on the description column to find appropriate categories. Otherwise I'd get an error because of NaNs.

In [49]:
orders_comp.loc[orders_comp.desc.isna(), 'desc'] = orders_comp.loc[orders_comp.desc.isna(), 'name']

In [442]:
path = r'C:\Users\muell\Desktop\WBS\Project 2\coding_challenges\clean_csv'

SyntaxError: EOL while scanning string literal (Temp/ipykernel_12192/979721235.py, line 1)

In [443]:
orders_comp.to_csv(path + '\orders_comp.csv', index=False)

# Finding categories

Now let's move on to finding some categories for the products. To find those I'll combine the brand name with string methods (regex) on the name and description on the products. I'll group by brands to see which ones sold the most products. These will be the brands I focus on for now. Furthermore I'll probably want to set categories as well as subcategories.

In [324]:
orders_comp['category'] = 'other'
orders_comp['subcategory'] = 'other'

In [52]:
orders_comp.brand.value_counts()

Apple                  13827
OWC                     3607
Pack                    2550
Belkin                  2256
LaCie                   2091
Crucial                 2027
Satechi                 1891
Western Digital         1859
Wacom                   1812
NewerTech               1575
iFixit                  1516
SanDisk                 1126
Seagate                  872
Griffin                  841
Samsung                  797
LG                       770
ZaggKeys                 743
Tucano                   703
Otterbox                 657
Dell                     642
JBL                      587
Synology                 558
Lifeproof                539
Philips                  533
Matias                   529
Moshi                    495
FCM                      473
Startech                 468
Spek SeeThru             444
Logitech                 435
Bose                     414
Kingston                 397
Mophie                   397
Trascend                 395
Macally       

Ok, so Apple has by far the most products. Let's get an idea what Apple is selling here. For sorting out the categories, I will only use the cell below.

In [454]:
#(
#    orders_comp.loc[(orders_comp.brand == 'Apple')
#                    &(orders_comp.category=='other')].sort_values('price',ascending=False)
#)
(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('Apple Dock Connector'))].sort_values('price',ascending=False)
)

Unnamed: 0,order_id,sku,qty,unit_price,total_price,price,name,order_date,brand,desc,in_stock,price_diff,disc_perc,category,subcategory
8768,307793,APP0234,1,35.0,35.0,35.0,Apple Dock Connector to VGA,2017-01-12 10:03:01,Apple,Dock Connector to VGA IOS.,0,0.0,0.0,adapters_cables,other
17181,315086,APP0234,1,35.0,35.0,35.0,Apple Dock Connector to VGA,2017-01-26 11:55:06,Apple,Dock Connector to VGA IOS.,0,0.0,0.0,adapters_cables,other
115230,401268,AP20125,1,5.99,5.99,35.0,Like new - Apple Dock Connector to VGA,2017-09-20 13:41:10,Apple,Dock Connector to VGA IOS,0,29.01,82.89,adapters_cables,other


In [474]:
# Prices for iphones start at about 400. All other iphone equipment is much cheaper. 
(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    & (orders_comp.name.str.contains('iPhone'))
                    & (orders_comp.price > 300), 'category']
) = 'devices'
(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    & (orders_comp.name.str.contains('iPhone'))
                    & (orders_comp.price > 300), 'subcategory']
) = 'apple_iphones'



In [475]:
(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('iPhone'))
                   &(orders_comp.desc.str.contains('[Cc]ase')), 'category']
) = 'accessories'

(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('iPhone'))
                   &(orders_comp.desc.str.contains('[Cc]ase')), 'subcategory']
) = 'apple_iphone_cases'

(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('iPhone'))
                    &(orders_comp.name.str.contains('Apple'))
                    &(orders_comp.name.str.contains('Case'))
                   &(orders_comp.category != 'iphones'), 'category']
) = 'accessories'

(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('iPhone'))
                    &(orders_comp.name.str.contains('Apple'))
                    &(orders_comp.name.str.contains('Case'))
                   &(orders_comp.category != 'iphones'), 'subcategory']
) = 'apple_iphone_cases'

In [476]:
(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('iPhone'))
                    &(orders_comp.desc.str.contains('[Cc]onnector'))
                    &(orders_comp.desc.str.contains('[Dd]ock')),'category']
) = 'accessories'

(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('iPhone'))
                    &(orders_comp.desc.str.contains('[Cc]onnector'))
                    &(orders_comp.desc.str.contains('[Dd]ock')),'subcategory']
) = 'apple_iphone_charging'

In [477]:
(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    & (orders_comp.desc.str.contains('iPhone'))
                    & (orders_comp.desc.str.contains('Battery')), 'category']
) = 'accessories'

(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    & (orders_comp.desc.str.contains('iPhone'))
                    & (orders_comp.desc.str.contains('Battery')), 'subcategory']
) = 'apple_iphone_battery_cases'

In [478]:
# apple care products for extended warranty are easy to spot. I'll filter by the description.
# I just put them to service, which I'll divide into warranty and repair at least
(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                   & (orders_comp.desc.str.contains('extended warranty')), 'category']
) = 'service'

(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                   & (orders_comp.desc.str.contains('extended warranty')), 'subcategory']
) = 'applecare_warranty'

In [479]:
# prices for macbooks are always above 500, regardless of new or used. 
# Might have to revisit, if the name is written somewhat else.
# Maybe I go for new and used as subcategories
(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                   & (orders_comp.name.str.contains('Mac[bB]ook|iMac|Mac Pro|Mac mini'))
                    &(orders_comp.price > 500), 'category']
) = 'devices'

(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                   & (orders_comp.name.str.contains('Mac[bB]ook|iMac|Mac Pro|Mac mini'))
                    &(orders_comp.price > 500), 'subcategory']
) = 'apple_macbooks'

In [480]:
# MagSafe adapters. 20 of these entries are MagSafe 1 to MagSafe 2 converter
(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('MagSafe')), 'category']
) = 'accessories'

(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('MagSafe')), 'subcategory']
) = 'apple_macbook_charging'

In [481]:
# ipads are above 250, nothing with iPad in the name is that expensive
(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                   & (orders_comp.name.str.contains('iPad'))
                    &(orders_comp.price > 250), 'category']
) = 'devices'

(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                   & (orders_comp.name.str.contains('iPad'))
                    &(orders_comp.price > 250), 'subcategory']
) = 'apple_ipads'

In [482]:
# Apple Watches are above 250 before there is sth else
(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                   &(orders_comp.name.str.contains('[Ww]atch'))
                   &(orders_comp.price > 250), 'category']
) = 'devices'

(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                   &(orders_comp.name.str.contains('[Ww]atch'))
                   &(orders_comp.price > 250), 'subcategory']
) = 'applewatch'

# Here come the charger for apple watches
(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                   &(orders_comp.name.str.contains('Apple [Ww]atch'))
                    &(orders_comp.name.str.contains('[Mm]agnetic')), 'category']
) = 'accessories'

(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                   &(orders_comp.name.str.contains('Apple [Ww]atch'))
                    &(orders_comp.name.str.contains('[Mm]agnetic')), 'subcategory']
) = 'applewatch_charging'

# some straps that were missing
(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('Correa')), 'category']
) = 'accessories'

(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('Correa')), 'subcategory']
) = 'applewatch_straps'

In [483]:
(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('Strap')), 'category']
) = 'accessories'

(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('Strap')), 'subcategory']
) = 'applewatch_straps'

In [484]:
# AirPorts are a thing apparently
(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                  &(orders_comp.name.str.contains('Air[Pp]ort')), 'category']
) = 'devices'

(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                  &(orders_comp.name.str.contains('Air[Pp]ort')), 'subcategory']
) = 'apple_airports'

In [485]:
# I only saw ipod touch, nano and shuffle. By specifying in the regex, I don't need a price range
(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                   &(orders_comp.name.str.contains('iPod Touch|iPod Shuffle|iPod Nano')), 'category']
) = 'devices'

(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                   &(orders_comp.name.str.contains('iPod Touch|iPod Shuffle|iPod Nano')), 'subcategory']
) = 'apple_ipods'

In [486]:
# apple tv products cost at least above 70. Cheaper products are warranty products
(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('Apple TV'))
                    &(orders_comp.price > 75), 'category']
) = 'devices'

(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('Apple TV'))
                    &(orders_comp.price > 75), 'subcategory']
) = 'appletv'

(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('Siri|Remote Loop')), 'category']
) = 'accessories'

(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('Siri|Remote Loop')), 'subcategory']
) = 'appletv_remote'

In [487]:
# Since the smart keyboards /smart cover | Apple Pencils are designed for ipads, I will put them in that category. 
# Might have to change category name later
(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('Smart Keyboard|iPad Keyboard|Smart Cover|Pencil')), 'category']
) = 'accessories'

(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('Smart Keyboard|iPad Keyboard')), 'subcategory']
) = 'apple_ipad_keyboards'

(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('Smart Cover')), 'subcategory']
) = 'apple_ipad_cases'

(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('Pencil')), 'subcategory']
) = 'apple_ipad_pencils'

In [488]:
(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('iPad'))
                    &(orders_comp.name.str.contains('Case')), 'category']
) = 'accessories'

(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('iPad'))
                    &(orders_comp.name.str.contains('Case')), 'subcategory']
) = 'apple_ipad_cases'

In [489]:
# I put earpods and airpods in one category
(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('AirPods|EarPods|Headphones')), 'category']
) = 'accessories'

(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('AirPods')), 'subcategory']
) = 'apple_headphones_airpods'

(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('EarPods')), 'subcategory']
) = 'apple_headphones_earpods'

(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('Headphones'))
                    &(~orders_comp.name.str.contains('Airpods'))
                    &(~orders_comp.name.str.contains('Earpods')), 'subcategory']
) = 'apple_headphones'

In [490]:
# Man I didn't know how much stuff Apple has. 
# There is a MacBook which contains 'Magic', therefore I add a price limit for magic products
(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('Magic'))
                    &(orders_comp.price < 200), 'category']
) = 'accessories'

(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('Magic'))
                    &(orders_comp.name.str.contains('[Mm]ouse'))
                    &(orders_comp.price < 200), 'subcategory']
) = 'apple_magic_mouse'

(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('Magic'))
                    &(orders_comp.name.str.contains('[Tt]rackpad'))
                    &(orders_comp.price < 200), 'subcategory']
) = 'apple_magic_trackpad'

(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('Magic'))
                    &(orders_comp.name.str.contains('[Kk]ey[Bb]oard'))
                    &(orders_comp.price < 200), 'subcategory']
) = 'apple_magic_keyboard'

In [491]:
# fortunately every entry containing '[aA]datper' is really an adapter it seems.
# I hope this doesn't include chargers ---> it doesn't :)
# 


(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('[Aa]dapter|Apple Dock Connector')), 'category']
) = 'accessories'

(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('[Aa]dapter|Apple Dock Connector')), 'subcategory']
) = 'apple_adapters_cables'

(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.desc.str.contains('[Aa]dapter')), 'category']
) = 'accessories'

(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.desc.str.contains('[Aa]dapter')), 'subcategory']
) = 'apple_adapters_cables'

(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('[Cc]able'))
                    &(~orders_comp.name.str.contains('[Mm]agnetic')), 'category']
) = 'accessories'

(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('[Cc]able'))
                    &(~orders_comp.name.str.contains('[Mm]agnetic')), 'subcategory']
) = 'apple_adapters_cables'

In [492]:
# Now I start my accessories category. 
#I'll use it for stuff that can be used not for apple products and that are more like gadgets
(    
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('[Ss]uper[Dd]rive')), 'category']
) = 'accessories'

(    
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('[Ss]uper[Dd]rive')), 'subcategory']
) = 'apple_superdrive'

(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('Keypad'))
                   &(~orders_comp.name.str.contains('Magic')), 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('Keypad'))
                   &(~orders_comp.name.str.contains('Magic')), 'subcategory']
) = 'apple_keyboard'

(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('Wireless'))
                    &(orders_comp.name.str.contains('Keyboard')), 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('Wireless'))
                    &(orders_comp.name.str.contains('Keyboard')), 'subcategory']
) = 'apple_keyboard'

(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('[Mm]ouse'))
                   &(~orders_comp.name.str.contains('Magic')), 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('[Mm]ouse'))
                   &(~orders_comp.name.str.contains('Magic')), 'subcategory']
) = 'apple_mouse'

In [493]:
(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('Belt Loop')), 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Apple')
                    &(orders_comp.name.str.contains('Belt Loop')), 'subcategory']
) = 'other'

Apple done, now OWC

In [621]:
(
    orders_comp.loc[(orders_comp.brand == 'OWC')
                    &(orders_comp.category =='other')
                #   &(orders_comp.desc.str.contains('Kit temperature'))
              #      &(orders_comp.name.str.contains('[Kk]it'))
                #    &(orders_comp.name.str.contains('Case'))
                   ].sort_values('price', ascending=False)
)

Unnamed: 0,order_id,sku,qty,unit_price,total_price,price,name,order_date,brand,desc,in_stock,price_diff,disc_perc,category,subcategory


In [682]:
# lot of superdrives

(
    orders_comp.loc[(orders_comp.brand == 'OWC')
                   &(orders_comp.name.str.contains('[Oo]ptical')), 'category']
) = 'spare_parts'
(
    orders_comp.loc[(orders_comp.brand == 'OWC')
                   &(orders_comp.name.str.contains('[Oo]ptical')), 'subcategory']
) = 'superdrives'

In [616]:
(
    orders_comp.loc[(orders_comp.brand == 'OWC')
                   &(orders_comp.desc.str.contains('RAM|Aluminium housing|Aluminum housing')), 'category']
) = 'accessories'

(
    orders_comp.loc[(orders_comp.brand == 'OWC')
                   &(orders_comp.desc.str.contains('RAM|Aluminium housing|Aluminum housing')), 'subcategory']
) = 'computer_parts'

In [605]:
(
    orders_comp.loc[(orders_comp.brand == 'OWC')
                   &(orders_comp.desc.str.contains('[Hh]ard [Dd]rive'))
                    &(~orders_comp.name.str.contains('[Oo]ptical'))
                    &(~orders_comp.name.str.contains('[Aa]dapter'))
                   , 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'OWC')
                   &(orders_comp.desc.str.contains('[Hh]ard [Dd]rive'))
                    &(~orders_comp.name.str.contains('[Oo]ptical'))
                    &(~orders_comp.name.str.contains('[Aa]dapter'))
                   , 'subcategory']
) = 'computer_parts'

(
    orders_comp.loc[(orders_comp.brand == 'OWC')
                   &(orders_comp.desc.str.contains('SSD'))
                    &(orders_comp.desc.str.contains('\d+GB|\d+TB'))
                   , 'category']
) ='accessories'

(
    orders_comp.loc[(orders_comp.brand == 'OWC')
                   &(orders_comp.desc.str.contains('SSD'))
                    &(orders_comp.desc.str.contains('\d+GB|\d+TB'))
                   , 'subcategory']
) ='computer_parts'

(
    orders_comp.loc[(orders_comp.brand == 'OWC')
                   &(orders_comp.name.str.contains('ThunderBay'))
                   ,'category']
) = 'accessories'

(
    orders_comp.loc[(orders_comp.brand == 'OWC')
                   &(orders_comp.name.str.contains('ThunderBay'))
                   ,'subcategory']
) = 'computer_parts'

In [606]:
(
    orders_comp.loc[(orders_comp.brand == 'OWC')
                   &(orders_comp.name.str.contains('Dock'))
                   ,'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'OWC')
                   &(orders_comp.name.str.contains('Dock'))
                   ,'subcategory']
) = 'adapters_cables'

In [607]:
(
    orders_comp.loc[(orders_comp.brand == 'OWC')
                   &(orders_comp.name.str.contains('DDR\d|memory|Thunderbolt|RAID'))
                   , 'category']
) ='accessories'

(
    orders_comp.loc[(orders_comp.brand == 'OWC')
                    &(orders_comp.category =='other')
                   &(orders_comp.name.str.contains('DDR\d|memory|Thunderbolt|RAID'))
                    &(~orders_comp.name.str.contains('[Cc]able'))
                   , 'subcategory']
) = 'computer_parts'

(
    orders_comp.loc[(orders_comp.brand == 'OWC')
                    &(orders_comp.category =='other')
                   &(orders_comp.name.str.contains('DDR\d|memory|Thunderbolt|RAID'))
                    &(orders_comp.name.str.contains('[Cc]able'))
                   , 'subcategory']
) = 'adapters_cables'

In [608]:
(
    orders_comp.loc[(orders_comp.brand == 'OWC')
                    &(~orders_comp.desc.str.contains('[Rr]eplacement|[Ss]ubstitution'))
                    &(orders_comp.name.str.contains('[Aa]dapter'))
                    , 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'OWC')
                    &(~orders_comp.desc.str.contains('[Rr]eplacement|[Ss]ubstitution'))
                    &(orders_comp.name.str.contains('[Aa]dapter'))
                    , 'subcategory']
) = 'adapters_cables'

In [609]:
(
    orders_comp.loc[(orders_comp.brand == 'OWC')
                   &(orders_comp.name.str.contains('SSD'))
                    &(orders_comp.name.str.contains('Case'))
                    &(orders_comp.price < 1000)
                   , 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'OWC')
                   &(orders_comp.name.str.contains('SSD'))
                    &(orders_comp.name.str.contains('Case'))
                    &(orders_comp.price < 1000)
                   , 'subcategory']
) = 'cases'

In [617]:
(
    orders_comp.loc[(orders_comp.brand == 'OWC')
                   &(orders_comp.name.str.contains('[Ii]nstallation|shielding'))
                    &(orders_comp.name.str.contains('[Kk]it'))
                   , 'category']
) = 'tools'
(
    orders_comp.loc[(orders_comp.brand == 'OWC')
                   &(orders_comp.name.str.contains('[Ii]nstallation'))
                    &(orders_comp.name.str.contains('[Kk]it'))
                   , 'subcategory']
) = 'installation_kit'
(
    orders_comp.loc[(orders_comp.brand == 'OWC')
                   &(orders_comp.name.str.contains('shielding'))
                    &(orders_comp.name.str.contains('[Kk]it'))
                   , 'subcategory']
) = 'shielding_kit'

In [611]:
(
    orders_comp.loc[(orders_comp.brand == 'OWC')
                   &(orders_comp.name.str.contains('raid', case=False)), 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'OWC')
                   &(orders_comp.name.str.contains('raid', case=False)), 'subcategory']
) = 'computer_parts'

In [612]:
(
    orders_comp.loc[(orders_comp.brand == 'OWC')
                   &(orders_comp.desc.str.contains('repair', case=False))
                   , 'category']
) ='tools'
(
    orders_comp.loc[(orders_comp.brand == 'OWC')
                   &(orders_comp.desc.str.contains('repair', case=False))
                   , 'subcategory']
) ='repair_kit'

In [613]:
(
    orders_comp.loc[(orders_comp.brand == 'OWC')

                   &(orders_comp.desc.str.contains('Box Portable|outer case|Outer carton|External Case|External SSD box')), 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'OWC')

                   &(orders_comp.desc.str.contains('Box Portable|outer case|Outer carton|External Case|External SSD box')), 'subcategory']
) = 'cases'


In [620]:
(
    orders_comp.loc[(orders_comp.brand == 'OWC')
                   &(orders_comp.desc.str.contains('Kit temperature|[tT]emperature sensor|internal disk reading')), 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'OWC')
                   &(orders_comp.desc.str.contains('Kit temperature|[Tt]emperature sensor|internal disk reading')), 'subcategory']
) = 'computer_parts'

In [619]:
(
    orders_comp.loc[(orders_comp.brand == 'OWC')
                   &(orders_comp.desc.str.contains('PCI Express')), 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'OWC')
                   &(orders_comp.desc.str.contains('PCI Express')), 'subcategory']
) = 'adapters_cables'

In [693]:
(
    orders_comp.loc[(orders_comp.brand == 'Pack')
                    &(orders_comp.category=='other')
               #     &(orders_comp.desc.str.contains('[Ee]xternal [Hh]ard [Dd]rive'))
                 #   &(orders_comp.desc.str.contains('[Ee]xpansion|[Uu]pgrade'))
                   ].sort_values('price', ascending=False).info()
                  #  &(orders_comp.price <414)].sort_values('price', ascending=False).head(100)
                 #   &(orders_comp.name.str.contains('Apple i?Mac'))].sort_values('price', ascending=False).tail(100)
                  # &(orders_comp.desc.str.contains('PCI Express')), 'category']
)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 115 entries, 171802 to 505
Data columns (total 15 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   order_id     115 non-null    int64         
 1   sku          115 non-null    object        
 2   qty          115 non-null    int64         
 3   unit_price   115 non-null    float64       
 4   total_price  115 non-null    float64       
 5   price        115 non-null    float64       
 6   name         115 non-null    object        
 7   order_date   115 non-null    datetime64[ns]
 8   brand        115 non-null    object        
 9   desc         115 non-null    object        
 10  in_stock     115 non-null    int64         
 11  price_diff   115 non-null    float64       
 12  disc_perc    115 non-null    float64       
 13  category     115 non-null    object        
 14  subcategory  115 non-null    object        
dtypes: datetime64[ns](1), float64(5), int64(3), object(6

In [658]:
(
    orders_comp.loc[(orders_comp.brand == 'Pack')
                   &(orders_comp.name.str.contains('DS\d+')), 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Pack')
                   &(orders_comp.name.str.contains('DS\d+')), 'subcategory']
) = 'servers'

(
    orders_comp.loc[(orders_comp.brand == 'Pack')
                    &(orders_comp.name.str.contains('Apple i?Mac|iPhone')),'category']
) = 'devices'
(
    orders_comp.loc[(orders_comp.brand == 'Pack')
                    &(orders_comp.name.str.contains('Apple i?Mac')),'subcategory']
) = 'apple_macbooks'

(
    orders_comp.loc[(orders_comp.brand == 'Pack')
                    &(orders_comp.name.str.contains('iPhone')),'subcategory']
) = 'apple_iphones'

In [689]:
(
    orders_comp.loc[(orders_comp.brand == 'Pack')
                   &(orders_comp.name.str.contains('Cinema Display|ThunderBay')), 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Pack')
                   &(orders_comp.name.str.contains('Cinema Display')), 'subcategory']
) = 'monitors'
(
    orders_comp.loc[(orders_comp.brand == 'Pack')
                   &(orders_comp.name.str.contains('Cinema Display')), 'subcategory']
) = 'computer_parts'

(
    orders_comp.loc[(orders_comp.brand == 'Pack')
                   &(orders_comp.desc.str.contains('NAS|QNAP|WD My Cloud')), 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Pack')
                   &(orders_comp.desc.str.contains('NAS|QNAP|WD My Cloud')), 'subcategory']
) = 'servers'
(
    orders_comp.loc[(orders_comp.brand == 'Pack')
                   &(orders_comp.desc.str.contains('RAID|SSD upgrade|[Ee]xternal [Hh]ard [Dd]rive')), 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Pack')
                   &(orders_comp.desc.str.contains('RAID|SSD upgrade|[Ee]xternal [Hh]ard [Dd]rive')), 'subcategory']
) = 'computer_parts'

In [661]:
(
    orders_comp.loc[(orders_comp.brand == 'Pack')
             #       &(orders_comp.category=='other')
                    &(orders_comp.desc.str.contains('SSD'))
                    &(orders_comp.desc.str.contains('[Ee]xpansion|[Uu]pgrade'))
                   , 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Pack')
             #       &(orders_comp.category=='other')
                    &(orders_comp.desc.str.contains('SSD'))
                    &(orders_comp.desc.str.contains('[Ee]xpansion|[Uu]pgrade'))
                   , 'subcategory']
) = 'computer_parts'

In [665]:
(
    orders_comp.loc[(orders_comp.brand == 'Pack')
                    &(orders_comp.desc.str.contains('[Hh]eadset')), 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Pack')
                    &(orders_comp.desc.str.contains('[Hh]eadset')), 'subcategory']
) = 'headsets'

In [691]:
(
    orders_comp.loc[(orders_comp.brand == 'Pack')
                    &(orders_comp.desc.str.contains('[Cc]able|[Cc]harger')), 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Pack')
                    &(orders_comp.desc.str.contains('[Cc]able|[Cc]harger')), 'subcategory']
) = 'adapters_cables'

In [674]:
(
    orders_comp.loc[(orders_comp.brand == 'Pack')
                    &(orders_comp.name.str.contains('Crucial')), 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Pack')
                    &(orders_comp.name.str.contains('Crucial')), 'subcategory']
) = 'computer_parts'

In [677]:
(
    orders_comp.loc[(orders_comp.brand == 'Pack')
                    &(orders_comp.name.str.contains('[Ff]ixit')), 'category']
) = 'tools'

(
    orders_comp.loc[(orders_comp.brand == 'Pack')
                    &(orders_comp.name.str.contains('[Ff]ixit')), 'subcategory']
) = 'opening_tools'

In [683]:
(
    orders_comp.loc[(orders_comp.brand == 'Pack')
                    &(orders_comp.desc.str.contains('Replacement')), 'category']
) = 'spare_parts'
(
    orders_comp.loc[(orders_comp.brand == 'Pack')
                    &(orders_comp.desc.str.contains('Replacement')), 'subcategory']
) = 'superdrives'

Trying to work a little faster now. Will not categorize EVERY item anymore.

In [774]:
(
    orders_comp.loc[(orders_comp.brand == 'Belkin')
                     &(orders_comp.category =='other')
             #      &(orders_comp.desc.str.contains('External Battery'))
         #          &(orders_comp.name.str.contains('Screen Protector'))
                 #   &(orders_comp.desc.str.contains('[Cc]ase'))
                  ].sort_values('price', ascending=False).head(100)
)

Unnamed: 0,order_id,sku,qty,unit_price,total_price,price,name,order_date,brand,desc,in_stock,price_diff,disc_perc,category,subcategory
250714,500847,BEL0180,1,119.99,119.99,149.99,Belkin NetCam WeMO WiFi HD night vision,2018-01-29 11:02:42,Belkin,HD security camera with night vision and contr...,0,30.0,20.0,other,cases
201091,464726,BEL0175,1,79.99,79.99,79.99,Belkin WeMO Led Lighting LED Bulbs Sept. 2 + W...,2017-12-17 12:16:18,Belkin,Set of 2 LED Smart Bulbs + Wemo Link Control f...,1,0.0,0.0,other,cases
262286,506488,BEL0367,1,55.99,55.99,59.99,Belkin Pro Messenger Active Messenger,2018-02-08 10:42:44,Belkin,Shoulder bag your daily required to carry your...,1,4.0,6.67,other,cases
205522,468658,BEL0367,1,55.99,55.99,59.99,Belkin Pro Messenger Active Messenger,2017-12-21 10:58:14,Belkin,Shoulder bag your daily required to carry your...,1,4.0,6.67,other,cases
253414,502477,BEL0367,1,55.99,55.99,59.99,Belkin Pro Messenger Active Messenger,2018-01-31 02:41:57,Belkin,Shoulder bag your daily required to carry your...,1,4.0,6.67,other,cases
164945,443814,BEL0070,1,25.49,25.49,49.99,Music Receiver Belkin iPhone music receiver,2017-11-25 18:31:05,Belkin,Music Receiver iPhone iPad and iPod Touch for ...,0,24.5,49.01,other,cases
226302,485701,BEL0070,1,29.99,29.99,49.99,Music Receiver Belkin iPhone music receiver,2018-01-08 18:11:04,Belkin,Music Receiver iPhone iPad and iPod Touch for ...,0,20.0,40.01,other,cases
201939,465469,BEL0070,1,29.99,29.99,49.99,Music Receiver Belkin iPhone music receiver,2017-12-18 10:40:16,Belkin,Music Receiver iPhone iPad and iPod Touch for ...,0,20.0,40.01,other,cases
58761,351467,BEL0070,1,29.99,29.99,49.99,Music Receiver Belkin iPhone music receiver,2017-05-01 10:36:20,Belkin,Music Receiver iPhone iPad and iPod Touch for ...,0,20.0,40.01,other,cases
29356,325760,BEL0070,1,25.49,25.49,49.99,Music Receiver Belkin iPhone music receiver,2017-02-17 18:52:39,Belkin,Music Receiver iPhone iPad and iPod Touch for ...,0,24.5,49.01,other,cases


In [768]:
(
    orders_comp.loc[(orders_comp.brand == 'Belkin')
                #    &(orders_comp.category =='other')
                   &(orders_comp.name.str.contains('PowerHouse'))
              #      &(orders_comp.name.str.contains('[Kk]it'))
                #    &(orders_comp.name.str.contains('Case'))
                   , 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Belkin')
                #    &(orders_comp.category =='other')
                   &(orders_comp.name.str.contains('PowerHouse'))
              #      &(orders_comp.name.str.contains('[Kk]it'))
                #    &(orders_comp.name.str.contains('Case'))
                   , 'subcategory']
) = 'adapters_cables'

(
    orders_comp.loc[(orders_comp.brand == 'Belkin')
                   &(orders_comp.name.str.contains('Screen Protector|Bandolier|[Aa]dapter|[Cc]harger|[Cc]able|Thunderbolt|RockStar MIXIT|Wireless charging|Backpack|Insight Power Control')), 'category']
) = 'accessories'

(
    orders_comp.loc[(orders_comp.brand == 'Belkin')
                   &(orders_comp.name.str.contains('Screen Protector|Bandolier|Backpack')), 'subcategory']
) = 'cases'
(
    orders_comp.loc[(orders_comp.brand == 'Belkin')
                   &(orders_comp.name.str.contains('[Aa]dapter|[Cc]harger|[Cc]able|Thunderbolt|RockStar MIXIT|Wireless charging')), 'subcategory']
) = 'adapters_cables'

(
    orders_comp.loc[(orders_comp.brand == 'Belkin')
                   &(orders_comp.name.str.contains('Insight Power Control')), 'subcategory']
) = 'gadgets'


In [772]:
(
    orders_comp.loc[(orders_comp.brand == 'Belkin')
                   &(orders_comp.desc.str.contains('[Cc]ase|[eE]xternal [Bb]attery|Loading dock|Charging Dock|[Cc]over|armband|controller|Keypad')), 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Belkin')
                   &(orders_comp.desc.str.contains('Loading dock|Charging Dock')), 'subcategory']
) = 'adapters_cables'
(
    orders_comp.loc[(orders_comp.brand == 'Belkin')
                   &(orders_comp.desc.str.contains('[Cc]over|[Cc]ase|')), 'subcategory']
) = 'cases'
(
    orders_comp.loc[(orders_comp.brand == 'Belkin')
                   &(orders_comp.desc.str.contains('[eE]xternal [bB]attery|armband|controller|Keypad')), 'subcategory']
) = 'gadgets'

In [784]:
(
    orders_comp.loc[(orders_comp.brand == 'LaCie')
                     &(orders_comp.category =='other')
                 #  &(orders_comp.desc.str.contains('[Hh]ard [Dd]rive'))
         #          &(orders_comp.name.str.contains('Screen Protector'))
                 #   &(orders_comp.desc.str.contains('[Cc]ase'))
                  ].sort_values('price', ascending=False).head(100)
)

Unnamed: 0,order_id,sku,qty,unit_price,total_price,price,name,order_date,brand,desc,in_stock,price_diff,disc_perc,category,subcategory
50004,343683,LAC0107,1,49.99,49.99,69.99,LaCie 32GB USB 3.0 PenDrive XtremKey,2017-04-05 17:31:04,LaCie,32GB USB 3.0 flash drive resistant to extreme ...,0,20.0,28.58,other,other


In [786]:
(
    orders_comp.loc[(orders_comp.brand == 'LaCie')
                  , 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'LaCie')
                   &(orders_comp.desc.str.contains('[Hh]ard [Dd]rive|Thunderbolt|RAID|Hard [Dd]isk'))
                  , 'subcategory']
) = 'computer_parts'
(
    orders_comp.loc[(orders_comp.brand == 'LaCie')
                   &(orders_comp.desc.str.contains('flash drive resistant'))
                  , 'subcategory']
) = 'flash_drive'

In [787]:
orders_comp.brand.value_counts()

Apple                  13827
OWC                     3607
Pack                    2550
Belkin                  2256
LaCie                   2091
Crucial                 2027
Satechi                 1891
Western Digital         1859
Wacom                   1812
NewerTech               1575
iFixit                  1516
SanDisk                 1126
Seagate                  872
Griffin                  841
Samsung                  797
LG                       770
ZaggKeys                 743
Tucano                   703
Otterbox                 657
Dell                     642
JBL                      587
Synology                 558
Lifeproof                539
Philips                  533
Matias                   529
Moshi                    495
FCM                      473
Startech                 468
Spek SeeThru             444
Logitech                 435
Bose                     414
Kingston                 397
Mophie                   397
Trascend                 395
Macally       

In [792]:
(
    orders_comp.loc[(orders_comp.brand == 'Crucial')
                     &(orders_comp.category =='other')
                   &(orders_comp.price<98)
         #          &(orders_comp.name.str.contains('Screen Protector'))
                 #   &(orders_comp.desc.str.contains('[Cc]ase'))
                  ].sort_values('price', ascending=False).head(500)
)

Unnamed: 0,order_id,sku,qty,unit_price,total_price,price,name,order_date,brand,desc,in_stock,price_diff,disc_perc,category,subcategory
130771,415104,CRU0025-2,1,90.99,90.99,97.98,Crucial memory Mac 8GB (2x4GB) SO-DIMM DDR3 16...,2017-10-20 13:45:06,Crucial,8GB RAM (2x4GB) SO-DIMM 1600MHZ iMac and Macbo...,1,6.99,7.13,other,other
61374,353787,CRU0025-2,1,78.99,78.99,97.98,Crucial memory Mac 8GB (2x4GB) SO-DIMM DDR3 16...,2017-05-08 12:25:28,Crucial,8GB RAM (2x4GB) SO-DIMM 1600MHZ iMac and Macbo...,1,18.99,19.38,other,other
136397,420001,CRU0025-2,1,90.99,90.99,97.98,Crucial memory Mac 8GB (2x4GB) SO-DIMM DDR3 16...,2017-10-30 19:15:39,Crucial,8GB RAM (2x4GB) SO-DIMM 1600MHZ iMac and Macbo...,1,6.99,7.13,other,other
138632,421907,CRU0025-2,1,90.99,90.99,97.98,Crucial memory Mac 8GB (2x4GB) SO-DIMM DDR3 16...,2017-11-04 19:25:19,Crucial,8GB RAM (2x4GB) SO-DIMM 1600MHZ iMac and Macbo...,1,6.99,7.13,other,other
141631,424529,CRU0025-2,1,80.07,80.07,97.98,Crucial memory Mac 8GB (2x4GB) SO-DIMM DDR3 16...,2017-11-10 13:26:11,Crucial,8GB RAM (2x4GB) SO-DIMM 1600MHZ iMac and Macbo...,1,17.91,18.28,other,other
27130,323935,CRU0025-2,1,82.99,82.99,97.98,Crucial memory Mac 8GB (2x4GB) SO-DIMM DDR3 16...,2017-02-14 09:22:47,Crucial,8GB RAM (2x4GB) SO-DIMM 1600MHZ iMac and Macbo...,1,14.99,15.3,other,other
17007,314933,CRU0025-2,1,65.69,65.69,97.98,Crucial memory Mac 8GB (2x4GB) SO-DIMM DDR3 16...,2017-01-25 22:59:07,Crucial,8GB RAM (2x4GB) SO-DIMM 1600MHZ iMac and Macbo...,1,32.29,32.95,other,other
157213,438008,CRU0025-2,1,77.34,77.34,97.98,Crucial memory Mac 8GB (2x4GB) SO-DIMM DDR3 16...,2017-11-24 10:30:50,Crucial,8GB RAM (2x4GB) SO-DIMM 1600MHZ iMac and Macbo...,1,20.64,21.06,other,other
264726,509282,CRU0025-2,1,90.99,90.99,97.98,Crucial memory Mac 8GB (2x4GB) SO-DIMM DDR3 16...,2018-02-10 13:12:29,Crucial,8GB RAM (2x4GB) SO-DIMM 1600MHZ iMac and Macbo...,1,6.99,7.13,other,other
165130,443955,CRU0025-2,1,77.34,77.34,97.98,Crucial memory Mac 8GB (2x4GB) SO-DIMM DDR3 16...,2017-11-25 19:31:05,Crucial,8GB RAM (2x4GB) SO-DIMM 1600MHZ iMac and Macbo...,1,20.64,21.06,other,other


In [793]:
(
    orders_comp.loc[(orders_comp.brand == 'Crucial'), 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Crucial'), 'subcategory']
) = 'computer_parts_memory'


In [845]:
(
    orders_comp.loc[(orders_comp.brand == 'Satechi')
                     &(orders_comp.subcategory =='other')
            #       &(orders_comp.price<79)
                 #  &(orders_comp.name.str.contains('[Cc]able'))
                  #  &(orders_comp.desc.str.contains('[Dd]ock'))
                  ].sort_values('price', ascending=False)
)

Unnamed: 0,order_id,sku,qty,unit_price,total_price,price,name,order_date,brand,desc,in_stock,price_diff,disc_perc,category,subcategory


In [835]:
(
    orders_comp.loc[(orders_comp.brand == 'Satechi')
                  , 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Satechi')
                   &(orders_comp.name.str.contains('[Hh]ub'))
                  , 'subcategory']
) = 'hubs'
(
    orders_comp.loc[(orders_comp.brand == 'Satechi')
                   &(orders_comp.name.str.contains('Keypad'))
                  , 'subcategory']
) = 'keyboards'
(
    orders_comp.loc[(orders_comp.brand == 'Satechi')
                   &(orders_comp.name.str.contains('[Hh]eadset|[Hh]eadphones|[Ee]arphones'))
                  , 'subcategory']
) = 'headphones'
(
    orders_comp.loc[(orders_comp.brand == 'Satechi')
                   &(orders_comp.desc.str.contains('[Ss]peakers'))
                  , 'subcategory']
) = 'speakers'

(
    orders_comp.loc[(orders_comp.brand == 'Satechi')
                   &(orders_comp.name.str.contains('Mac'))
                    &(orders_comp.desc.str.contains('[Ss]tand')), 'subcategory']
) = 'macbook_stands'

(
    orders_comp.loc[(orders_comp.brand == 'Satechi')
                   &(orders_comp.name.str.contains('[Cc]able')), 'subcategory']
) = 'adapters_cables'

In [844]:

(
    orders_comp.loc[(orders_comp.brand == 'Satechi')
                   &(orders_comp.desc.str.contains('Outer carton|External box')), 'subcategory']
) = 'cases'

(
    orders_comp.loc[(orders_comp.brand == 'Satechi')
                   &(orders_comp.desc.str.contains('[Dd]ock|charging|Card reader')), 'subcategory']
) = 'adapters_cables'



In [827]:

(
    orders_comp.loc[(orders_comp.brand == 'Satechi')
                   &(orders_comp.desc.str.contains('[Ss]upport')), 'subcategory']
) = 'support'


(
    orders_comp.loc[(orders_comp.brand == 'Satechi')
                   &(orders_comp.name.str.contains('[Ss]upport')), 'subcategory']
) = 'support'

In [820]:

(
    orders_comp.loc[(orders_comp.brand == 'Satechi')
                   &(orders_comp.name.str.contains('[Aa]dapter|[Cc]harger')), 'subcategory']
) = 'adapters_cables'

In [840]:

(
    orders_comp.loc[(orders_comp.brand == 'Satechi')
                   &(orders_comp.desc.str.contains('Aluminum mat|mat aluminum')), 'subcategory']
) = 'mouse_mat'
(
    orders_comp.loc[(orders_comp.brand == 'Satechi')
                   &(orders_comp.desc.str.contains('briefcase')), 'subcategory']
) = 'macbook_cases'

In [842]:
(
    orders_comp.loc[(orders_comp.brand == 'Satechi')
                   &(orders_comp.name.str.contains('Power Meter|Multimedia')), 'subcategory']
) = 'gadgets'

In [846]:
orders_comp.brand.value_counts()

Apple                  13827
OWC                     3607
Pack                    2550
Belkin                  2256
LaCie                   2091
Crucial                 2027
Satechi                 1891
Western Digital         1859
Wacom                   1812
NewerTech               1575
iFixit                  1516
SanDisk                 1126
Seagate                  872
Griffin                  841
Samsung                  797
LG                       770
ZaggKeys                 743
Tucano                   703
Otterbox                 657
Dell                     642
JBL                      587
Synology                 558
Lifeproof                539
Philips                  533
Matias                   529
Moshi                    495
FCM                      473
Startech                 468
Spek SeeThru             444
Logitech                 435
Bose                     414
Kingston                 397
Mophie                   397
Trascend                 395
Macally       

In [851]:
(
    orders_comp.loc[(orders_comp.brand == 'Western Digital')
                     &(orders_comp.category =='other')
                    &(orders_comp.price<60)
                 #  &(orders_comp.name.str.contains('[Cc]able'))
                  #  &(orders_comp.desc.str.contains('[Dd]ock'))
                  ].sort_values('price', ascending=False).head(500)
)

Unnamed: 0,order_id,sku,qty,unit_price,total_price,price,name,order_date,brand,desc,in_stock,price_diff,disc_perc,category,subcategory
93852,382405,WDT0135-A,2,11.84,23.68,12.9,Open - Western Digital Network PC and Mac 3TB ...,2017-07-26 17:04:47,Western Digital,Open WD hard drive in perfect condition design...,0,1.06,8.22,other,other


In [853]:
(
    orders_comp.loc[(orders_comp.brand == 'Western Digital'), 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Western Digital'), 'subcategory']
) = 'computer_parts_memory'
(
    orders_comp.loc[(orders_comp.brand == 'Western Digital')
                    &(orders_comp.desc.str.contains('NAS'))
                    &(~orders_comp.desc.str.contains('[Hh]ard [Dd]rive')), 'subcategory']
) = 'servers'

In [876]:
(
    orders_comp.loc[(orders_comp.brand == 'Wacom')
                     &(orders_comp.category =='other')
                 #   &(orders_comp.price<60)
                 #  &(orders_comp.name.str.contains('[Cc]able'))
                  #  &(orders_comp.desc.str.contains('[Dd]ock'))
                  ].sort_values('price', ascending=False).head(500)
)

Unnamed: 0,order_id,sku,qty,unit_price,total_price,price,name,order_date,brand,desc,in_stock,price_diff,disc_perc,category,subcategory
15637,313775,WAC0170-A,1,78.93,78.93,159.9,Open - Wacom Bamboo Spark snap-fit ​​iPad Air 2,2017-01-23 13:24:25,Wacom,Bloc Smart notes.,0,80.97,50.64,other,other
16518,314517,WAC0170-A,1,78.93,78.93,159.9,Open - Wacom Bamboo Spark snap-fit ​​iPad Air 2,2017-01-25 00:39:46,Wacom,Bloc Smart notes.,0,80.97,50.64,other,other
23832,321012,WAC0176,1,79.99,79.99,159.9,Wacom Bamboo Spark snap-fit ​​iPad Air 2,2017-02-07 17:14:24,Wacom,Bloc Smart notes.,0,79.91,49.97,other,other
25531,322514,WAC0176,1,79.99,79.99,159.9,Wacom Bamboo Spark snap-fit ​​iPad Air 2,2017-02-11 08:51:19,Wacom,Bloc Smart notes.,0,79.91,49.97,other,other
19482,317195,WAC0175-A,1,35.3,35.3,59.9,Open - Wacom Bamboo Fineline 2 Pointer Gold,2017-01-30 09:40:13,Wacom,Intelligent digital thin pencil tip for iPad,0,24.6,41.07,other,other


In [862]:
(
    orders_comp.loc[(orders_comp.brand == 'Wacom')
                    &(orders_comp.desc.str.contains('[Gg]raphics? [Tt]ablet|Graphical touch tablet')), 'category']
) = 'devices'
(
    orders_comp.loc[(orders_comp.brand == 'Wacom')
                    &(orders_comp.desc.str.contains('[Gg]raphics? [Tt]ablet|Graphical touch tablet')), 'subcategory']
) = 'graphics_tablet'
(
    orders_comp.loc[(orders_comp.brand == 'Wacom')
                    &(orders_comp.name.str.contains('[Gg]raphics? [Tt]ablet|Graphical touch tablet')), 'category']
) = 'devices'
(
    orders_comp.loc[(orders_comp.brand == 'Wacom')
                    &(orders_comp.name.str.contains('[Gg]raphics? [Tt]ablet|Graphical touch tablet')), 'subcategory']
) = 'graphics_tablet'
(
    orders_comp.loc[(orders_comp.brand == 'Wacom')
                    &(orders_comp.desc.str.contains('[Gg]raphics? [Dd]isplay')), 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Wacom')
                    &(orders_comp.desc.str.contains('[Gg]raphics? [Dd]isplay')), 'subcategory']
) = 'graphic_display_macbook'

In [867]:
(
    orders_comp.loc[(orders_comp.brand == 'Wacom')
                    &(orders_comp.desc.str.contains(' [Pp]en |[Pp]ointer|[Ss]tylus'))
                    &(~orders_comp.desc.str.contains('[Rr]eplacement'))
                    &(~orders_comp.name.str.contains('Folio|Slate')), 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Wacom')
                    &(orders_comp.desc.str.contains(' [Pp]en |[Pp]ointer|[Ss]tylus'))
                    &(~orders_comp.desc.str.contains('[Rr]eplacement'))
                    &(~orders_comp.name.str.contains('Folio|Slate')), 'subcategory']
) = 'tablet_pencils'

In [870]:
(
    orders_comp.loc[(orders_comp.brand == 'Wacom')
                    &(orders_comp.desc.str.contains(' [Pp]en |[Pp]ointer|[Ss]tylus|tips'))
                    &(orders_comp.desc.str.contains('[Rr]eplacement'))
                    &(~orders_comp.name.str.contains('Folio|Slate')), 'category']
) = 'spare_parts'
(
    orders_comp.loc[(orders_comp.brand == 'Wacom')
                    &(orders_comp.desc.str.contains(' [Pp]en |[Pp]ointer|[Ss]tylus|tips'))
                    &(orders_comp.desc.str.contains('[Rr]eplacement'))
                    &(~orders_comp.name.str.contains('Folio|Slate')), 'subcategory']
) = 'tablet_pencils'

In [871]:
(
    orders_comp.loc[(orders_comp.brand == 'Wacom')
                    &(orders_comp.desc.str.contains('Bloc'))
                    &(orders_comp.name.str.contains('Slate')), 'category']
) = 'devices'
(
    orders_comp.loc[(orders_comp.brand == 'Wacom')
                    &(orders_comp.desc.str.contains('Bloc'))
                    &(orders_comp.name.str.contains('Slate')), 'subcategory']
) = 'notepads'

In [872]:
(
    orders_comp.loc[(orders_comp.brand == 'Wacom')
                    &(orders_comp.name.str.contains('Folio')), 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Wacom')
                    &(orders_comp.name.str.contains('Folio')), 'subcategory']
) = 'tablet_cases'

In [874]:
(
    orders_comp.loc[(orders_comp.brand == 'Wacom')
                    &(orders_comp.desc.str.contains('[Ss]upport')), 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Wacom')
                    &(orders_comp.desc.str.contains('[Ss]upport')), 'subcategory']
) = 'tablet_support'

In [875]:
(
    orders_comp.loc[(orders_comp.brand == 'Wacom')
                    &(orders_comp.name.str.contains('Accessory Kit')), 'category']
) = 'tools'
(
    orders_comp.loc[(orders_comp.brand == 'Wacom')
                    &(orders_comp.name.str.contains('Accessory Kit')), 'subcategory']
) = 'tablet_pencil_tools'

In [892]:
(
    orders_comp.loc[(orders_comp.brand == 'NewerTech')
                     &(orders_comp.category =='other')
                  #  &(orders_comp.price<29)
                 #  &(orders_comp.name.str.contains('[Cc]able'))
                  #  &(orders_comp.desc.str.contains('[Dd]ock'))
                  ].sort_values('price', ascending=False).info()
)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 55 entries, 63324 to 566
Data columns (total 15 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   order_id     55 non-null     int64         
 1   sku          55 non-null     object        
 2   qty          55 non-null     int64         
 3   unit_price   55 non-null     float64       
 4   total_price  55 non-null     float64       
 5   price        55 non-null     float64       
 6   name         55 non-null     object        
 7   order_date   55 non-null     datetime64[ns]
 8   brand        55 non-null     object        
 9   desc         55 non-null     object        
 10  in_stock     55 non-null     int64         
 11  price_diff   55 non-null     float64       
 12  disc_perc    55 non-null     float64       
 13  category     55 non-null     object        
 14  subcategory  55 non-null     object        
dtypes: datetime64[ns](1), float64(5), int64(3), object(6)


In [880]:
(
    orders_comp.loc[(orders_comp.brand == 'NewerTech')
                 #  &(orders_comp.name.str.contains('[Cc]able'))
                    &(orders_comp.desc.str.contains('[Dd]ock|[Aa]dapter|[Cc]able'))
                  , 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'NewerTech')
                 #  &(orders_comp.name.str.contains('[Cc]able'))
                    &(orders_comp.desc.str.contains('[Dd]ock|[Aa]dapter|[Cc]able'))
                  , 'subcategory']
) = 'adapters_cables'
(
    orders_comp.loc[(orders_comp.brand == 'NewerTech')
                 #  &(orders_comp.name.str.contains('[Cc]able'))
                    &(orders_comp.name.str.contains('[Dd]ock|[Aa]dapter|[Cc]able'))
                  , 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'NewerTech')
                 #  &(orders_comp.name.str.contains('[Cc]able'))
                    &(orders_comp.name.str.contains('[Dd]ock|[Aa]dapter|[Cc]able'))
                  , 'subcategory']
) = 'adapters_cables'

In [890]:
(
    orders_comp.loc[(orders_comp.brand == 'NewerTech')
                 #  &(orders_comp.name.str.contains('[Cc]able'))
                    &(orders_comp.desc.str.contains('storage box'))
                  , 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'NewerTech')
                 #  &(orders_comp.name.str.contains('[Cc]able'))
                    &(orders_comp.desc.str.contains('storage box'))
                  , 'subcategory']
) = 'cases'
(
    orders_comp.loc[(orders_comp.brand == 'NewerTech')
                   &(orders_comp.desc.str.contains('[Ii]Phone'))
                    &(orders_comp.desc.str.contains('[Cc]ase|casing'))
                  , 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'NewerTech')
                   &(orders_comp.desc.str.contains('[Ii]Phone'))
                    &(orders_comp.desc.str.contains('[Cc]ase|casing'))
                  , 'subcategory']
) = 'iphone_cases'
(
    orders_comp.loc[(orders_comp.brand == 'NewerTech')
                   &(orders_comp.name.str.contains('[Ii]Phone'))
                    &(orders_comp.name.str.contains('[Cc]ase'))
                  , 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'NewerTech')
                   &(orders_comp.name.str.contains('[Ii]Phone'))
                    &(orders_comp.name.str.contains('[Cc]ase'))
                  , 'subcategory']
) = 'iphone_cases'
(
    orders_comp.loc[(orders_comp.brand == 'NewerTech')
                   &(orders_comp.name.str.contains('[Ii]Pad'))
                    &(orders_comp.name.str.contains('[Cc]ase'))
                  , 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'NewerTech')
                   &(orders_comp.name.str.contains('[Ii]Pad'))
                    &(orders_comp.name.str.contains('[Cc]ase'))
                  , 'subcategory']
) = 'ipad_cases'
(
    orders_comp.loc[(orders_comp.brand == 'NewerTech')
                   &(orders_comp.desc.str.contains('[Pp]rotector'))
                    &(orders_comp.desc.str.contains('MacBook'))
                  , 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'NewerTech')
                   &(orders_comp.desc.str.contains('[Pp]rotector'))
                    &(orders_comp.desc.str.contains('MacBook'))
                  , 'subcategory']
) = 'macbook_cases'

In [882]:
(
    orders_comp.loc[(orders_comp.brand == 'NewerTech')
                   &(orders_comp.name.str.contains('Kit tools'))
                  , 'category']
) = 'tools'
(
    orders_comp.loc[(orders_comp.brand == 'NewerTech')
                   &(orders_comp.name.str.contains('Kit tools'))
                  , 'subcategory']
) = 'tools'

In [883]:
(
    orders_comp.loc[(orders_comp.brand == 'NewerTech')
                   &(orders_comp.desc.str.contains('MacBook'))
                    &(orders_comp.desc.str.contains('[Bb]attery'))
                  , 'category']
) = 'spare_parts'
(
    orders_comp.loc[(orders_comp.brand == 'NewerTech')
                   &(orders_comp.desc.str.contains('MacBook'))
                    &(orders_comp.desc.str.contains('[Bb]attery'))
                  , 'subcategory']
) = 'macbook_battery'

In [888]:
(
    orders_comp.loc[(orders_comp.brand == 'NewerTech')
                   &(orders_comp.desc.str.contains('[Hh]ousing|Raid'))
                  , 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'NewerTech')
                   &(orders_comp.desc.str.contains('[Hh]ousing|Raid'))
                  , 'subcategory']
) = 'computer_parts'

In [889]:
(
    orders_comp.loc[(orders_comp.brand == 'NewerTech')
                   &(orders_comp.desc.str.contains('Elegant'))
                  , 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'NewerTech')
                   &(orders_comp.desc.str.contains('Elegant'))
                  , 'subcategory']
) = 'macbook_support'

In [898]:
(
    orders_comp.loc[(orders_comp.brand == 'iFixit')
                     &(orders_comp.category =='other')
                   # &(orders_comp.price<39)
                 #  &(orders_comp.name.str.contains('[Cc]able'))
                  #  &(orders_comp.desc.str.contains('[Dd]ock'))
                  ].sort_values('price', ascending=False).head(500)
)

Unnamed: 0,order_id,sku,qty,unit_price,total_price,price,name,order_date,brand,desc,in_stock,price_diff,disc_perc,category,subcategory
134355,418251,IFX0136,1,89.99,89.99,89.95,iFixit Universal Bit Kit Tools 128 2016,2017-10-26 19:29:10,iFixit,128 Kit tools (screwdrivers and screws) in oak...,0,-0.04,-0.04,other,other
203611,466951,IFX0136,1,89.99,89.99,89.95,iFixit Universal Bit Kit Tools 128 2016,2017-12-19 17:09:08,iFixit,128 Kit tools (screwdrivers and screws) in oak...,0,-0.04,-0.04,other,other
276753,518471,IFX0136,1,85.49,85.49,89.95,iFixit Universal Bit Kit Tools 128 2016,2018-02-27 22:15:35,iFixit,128 Kit tools (screwdrivers and screws) in oak...,0,4.46,4.96,other,other
1008,300518,IFX0136,1,89.99,89.99,89.95,iFixit Universal Bit Kit Tools 128 2016,2017-01-02 13:06:49,iFixit,128 Kit tools (screwdrivers and screws) in oak...,0,-0.04,-0.04,other,other
4022,303394,IFX0136,2,89.99,179.98,89.95,iFixit Universal Bit Kit Tools 128 2016,2017-01-06 18:52:07,iFixit,128 Kit tools (screwdrivers and screws) in oak...,0,-0.04,-0.04,other,other
256818,504676,IFX0136,1,85.49,85.49,89.95,iFixit Universal Bit Kit Tools 128 2016,2018-02-02 21:52:50,iFixit,128 Kit tools (screwdrivers and screws) in oak...,0,4.46,4.96,other,other
193411,459616,IFX0136,1,89.99,89.99,89.95,iFixit Universal Bit Kit Tools 128 2016,2017-12-11 11:29:53,iFixit,128 Kit tools (screwdrivers and screws) in oak...,0,-0.04,-0.04,other,other
56112,348964,IFX0038,1,79.99,79.99,79.99,iFixit Change Display Full Kit White iPhone 5 ...,2017-04-23 22:46:04,iFixit,Screen Replacement Kit tools for iPhone 5.,0,0.0,0.0,other,tools_apple_repair
61449,353770,IFX0026,1,74.9,74.9,74.95,iFixit Display Kit Complete change screen iPho...,2017-05-08 15:34:18,iFixit,Screen replacement kit including tools and scr...,1,0.05,0.07,other,tools_apple_repair
228147,486774,IFX0026,1,71.15,71.15,74.95,iFixit Display Kit Complete change screen iPho...,2018-01-09 16:58:56,iFixit,Screen replacement kit including tools and scr...,1,3.8,5.07,other,tools_apple_repair


In [899]:
(
    orders_comp.loc[(orders_comp.brand == 'iFixit')
                   &(orders_comp.name.str.contains('[Tt]oolkit|adhesive strips|Screwdriver|[Ss]uction [Cc]ups'))
                    &(~orders_comp.name.str.contains('[Bb]attery'))
                  , 'category']
) = 'tools'
(
    orders_comp.loc[(orders_comp.brand == 'iFixit')
                   &(orders_comp.name.str.contains('[Tt]oolkit|adhesive strips|Screwdriver|[Ss]uction [Cc]ups'))
                    &(~orders_comp.name.str.contains('[Bb]attery'))
                  , 'subcategory']
) = 'tools_apple_repair'

(
    orders_comp.loc[(orders_comp.brand == 'iFixit')
                    &(orders_comp.name.str.contains('[Bb]attery'))
                  , 'category']
) = 'spare_parts'
(
    orders_comp.loc[(orders_comp.brand == 'iFixit')
                    &(orders_comp.name.str.contains('[Bb]attery'))
                  , 'subcategory']
) = 'apple_battery'
(
    orders_comp.loc[(orders_comp.brand == 'iFixit')
                    &(orders_comp.desc.str.contains('[Bb]attery'))
                  , 'category']
) = 'spare_parts'
(
    orders_comp.loc[(orders_comp.brand == 'iFixit')
                    &(orders_comp.desc.str.contains('[Bb]attery'))
                  , 'subcategory']
) = 'apple_battery'


(
    orders_comp.loc[(orders_comp.brand == 'iFixit')
                   &(orders_comp.name.str.contains('[Kk]it of [Tt]ools|[Rr]eplacement'))
                    &(~orders_comp.name.str.contains('[Bb]attery'))
                  , 'category']
) = 'tools'
(
    orders_comp.loc[(orders_comp.brand == 'iFixit')
                   &(orders_comp.desc.str.contains('[Kk]it of [Tt]ools|[Rr]eplacement'))
                    &(~orders_comp.desc.str.contains('[Bb]attery'))
                  , 'subcategory']
) = 'tools_apple_repair'
(
    orders_comp.loc[(orders_comp.brand == 'iFixit')
                   &(orders_comp.name.str.contains('[Cc]able'))
                  , 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'iFixit')
                   &(orders_comp.name.str.contains('[Cc]able'))
                  , 'subcategory']
) = 'adapters_cables'

(
    orders_comp.loc[(orders_comp.brand == 'iFixit')
                   &(orders_comp.category =='other')
                  , 'category']
) = 'tools'
(
    orders_comp.loc[(orders_comp.brand == 'iFixit')
                   &(orders_comp.category =='other')
                  , 'subcategory']
) = 'tools_apple_repair'

In [906]:
(
    orders_comp.loc[(orders_comp.brand == 'Seagate')
                     &(orders_comp.category =='other')
                    &(orders_comp.price<112)
                 #  &(orders_comp.name.str.contains('[Cc]able'))
                  #  &(orders_comp.desc.str.contains('[Dd]ock'))
                  ].sort_values('price', ascending=False).head(500)
)

Unnamed: 0,order_id,sku,qty,unit_price,total_price,price,name,order_date,brand,desc,in_stock,price_diff,disc_perc,category,subcategory
20348,317975,SEA0046-A,1,78.65,78.65,111.99,"Open - Seagate 1TB SSHD Hybrid Drive 2.5 ""hard...",2017-01-31 08:01:17,Seagate,Hybrid Hard Drive Mac and PC 1TB + 8GB SSD and...,0,33.34,29.77,other,other
95146,383534,SEA0108,1,81.99,81.99,109.99,Seagate 2TB NAS Hard Drive IronWolf SATA 3,2017-07-28 20:49:12,Seagate,NAS hard drive designed for systems with inter...,1,28.0,25.46,other,other
103408,390764,SEA0102-A,4,79.65,318.6,109.99,Open - Seagate 2TB Nas IronWolf Sata Hard Drive 3,2017-08-19 19:29:09,Seagate,NAS hard drive designed for systems with inter...,0,30.34,27.58,other,other
203144,466557,SEA0108,1,81.58,81.58,109.99,Seagate 2TB NAS Hard Drive IronWolf SATA 3,2017-12-20 17:32:05,Seagate,NAS hard drive designed for systems with inter...,1,28.41,25.83,other,other
241855,495725,SEA0108,1,81.58,81.58,109.99,Seagate 2TB NAS Hard Drive IronWolf SATA 3,2018-01-21 23:19:20,Seagate,NAS hard drive designed for systems with inter...,1,28.41,25.83,other,other
79113,369490,SEA0108,2,81.99,163.98,109.99,Seagate 2TB NAS Hard Drive IronWolf SATA 3,2017-06-27 12:17:44,Seagate,NAS hard drive designed for systems with inter...,1,28.0,25.46,other,other
169739,447274,SEA0108,1,76.34,76.34,109.99,Seagate 2TB NAS Hard Drive IronWolf SATA 3,2017-11-27 11:24:40,Seagate,NAS hard drive designed for systems with inter...,1,33.65,30.59,other,other
58392,351102,SEA0072,1,99.99,99.99,109.99,Seagate Backup Plus Slim External Hard Drive 2...,2017-04-29 18:02:08,Seagate,lightweight external hard drive and 25-inch ul...,0,10.0,9.09,other,other
45856,340081,SEA0072,1,109.99,109.99,109.99,Seagate Backup Plus Slim External Hard Drive 2...,2017-03-28 15:37:17,Seagate,lightweight external hard drive and 25-inch ul...,0,0.0,0.0,other,other
20885,318436,SEA0108,3,93.09,279.27,109.99,Seagate 2TB NAS Hard Drive IronWolf SATA 3,2017-01-31 20:06:30,Seagate,NAS hard drive designed for systems with inter...,1,16.9,15.37,other,other


In [902]:
(
    orders_comp.loc[(orders_comp.brand == 'SanDisk')
                  , 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'SanDisk')
                  , 'subcategory']
) = 'memory_apple_devices'

In [907]:
(
    orders_comp.loc[(orders_comp.brand == 'Seagate')
                  , 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Seagate')
                    &(orders_comp.desc.str.contains('NAS'))
                  , 'subcategory']
) = 'servers'
(
    orders_comp.loc[(orders_comp.brand == 'Seagate')
                    &(~orders_comp.desc.str.contains('NAS'))
                  , 'subcategory']
) = 'computer_parts_memory'

In [908]:
orders_comp.brand.value_counts()

Apple                  13827
OWC                     3607
Pack                    2550
Belkin                  2256
LaCie                   2091
Crucial                 2027
Satechi                 1891
Western Digital         1859
Wacom                   1812
NewerTech               1575
iFixit                  1516
SanDisk                 1126
Seagate                  872
Griffin                  841
Samsung                  797
LG                       770
ZaggKeys                 743
Tucano                   703
Otterbox                 657
Dell                     642
JBL                      587
Synology                 558
Lifeproof                539
Philips                  533
Matias                   529
Moshi                    495
FCM                      473
Startech                 468
Spek SeeThru             444
Logitech                 435
Bose                     414
Kingston                 397
Mophie                   397
Trascend                 395
Macally       

In [944]:
(
    orders_comp.loc[(orders_comp.brand == 'FCM')
                     &(orders_comp.category =='other')
                #    &(orders_comp.price<112)
                 #  &(orders_comp.name.str.contains('[Cc]able'))
                  #  &(orders_comp.desc.str.contains('[Dd]ock'))
                  ].sort_values('price', ascending=False)
)

Unnamed: 0,order_id,sku,qty,unit_price,total_price,price,name,order_date,brand,desc,in_stock,price_diff,disc_perc,category,subcategory
60302,352851,FCM0008-4,1,456.99,456.99,888.0,Mac FCM memory 64GB (4x16GB) DIMM DDR3 1866MHz,2017-05-04 23:28:07,FCM,64GB RAM (4x16GB) Mac Pro 2013.,0,431.01,48.54,other,other
70202,361583,FCM0008-4,1,456.99,456.99,888.0,Mac FCM memory 64GB (4x16GB) DIMM DDR3 1866MHz,2017-06-02 09:59:28,FCM,64GB RAM (4x16GB) Mac Pro 2013.,0,431.01,48.54,other,other
12330,311003,FCM0008-4,1,450.99,450.99,888.0,Mac FCM memory 64GB (4x16GB) DIMM DDR3 1866MHz,2017-01-17 15:14:28,FCM,64GB RAM (4x16GB) Mac Pro 2013.,0,437.01,49.21,other,other
28897,325372,FCM0008-4,1,498.99,498.99,888.0,Mac FCM memory 64GB (4x16GB) DIMM DDR3 1866MHz,2017-02-16 18:49:21,FCM,64GB RAM (4x16GB) Mac Pro 2013.,0,389.01,43.81,other,other
56513,349320,FCM0008-4,1,456.99,456.99,888.0,Mac FCM memory 64GB (4x16GB) DIMM DDR3 1866MHz,2017-04-24 18:13:30,FCM,64GB RAM (4x16GB) Mac Pro 2013.,0,431.01,48.54,other,other
15845,313935,FCM0038-4,1,278.99,278.99,607.92,Mac memory FCM 32GB (4x8GB) SO-DIMM DDR3 1867MHz,2017-01-23 18:37:11,FCM,Mac memory FCM 32GB (4x8GB) DDR3 1867MHz SO-DI...,0,328.93,54.11,other,other
31306,327471,FCM0038-4,1,294.99,294.99,607.92,Mac memory FCM 32GB (4x8GB) SO-DIMM DDR3 1867MHz,2017-02-22 11:11:24,FCM,Mac memory FCM 32GB (4x8GB) DDR3 1867MHz SO-DI...,0,312.93,51.48,other,other
77113,367762,FCM0008-2,1,228.99,228.99,444.0,Mac FCM memory 32GB (2x16GB) DIMM DDR3 1866MHz,2017-06-21 11:40:11,FCM,32GB RAM (2x16GB) Mac Pro 2013.,0,215.01,48.43,other,other
70932,362229,FCM0008-2,1,188.42,188.42,444.0,Mac FCM memory 32GB (2x16GB) DIMM DDR3 1866MHz,2017-06-04 20:27:08,FCM,32GB RAM (2x16GB) Mac Pro 2013.,0,255.58,57.56,other,other
82746,372592,FCM0008-2,1,228.99,228.99,444.0,Mac FCM memory 32GB (2x16GB) DIMM DDR3 1866MHz,2017-07-05 17:48:08,FCM,32GB RAM (2x16GB) Mac Pro 2013.,0,215.01,48.43,other,other


In [945]:
(
    orders_comp.loc[(orders_comp.brand == 'Griffin|Matias|FCM')
                    &(orders_comp.subcategory == 'other')
                  ,'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Griffin')
                    &(orders_comp.desc.str.contains('[Dd]ock|[Cc]harg|[Aa]dapter|[Cc]able'))
                  ,'subcategory']
) = 'adapters_cables'
(
    orders_comp.loc[(orders_comp.brand == 'FCM')
                  ,'subcategory']
) = 'computer_parts_memory'
(
    orders_comp.loc[(orders_comp.brand == 'Matias')
                    &(orders_comp.desc.str.contains('[Dd]ock|[Cc]harg|[Aa]dapter|[Cc]able'))
                  ,'subcategory']
) = 'keyboards'
(
    orders_comp.loc[(orders_comp.brand == 'Griffin')
                    &(orders_comp.desc.str.contains('[Bb]attery'))
                  ,'subcategory']
) = 'external_battery'

(
    orders_comp.loc[(orders_comp.brand == 'Griffin')
                    &(orders_comp.desc.str.contains('[sS]upport'))
                  ,'subcategory']
) = 'support_apple_devices'
(
    orders_comp.loc[(orders_comp.brand == 'Griffin')
                    &(orders_comp.subcategory == 'other')
                  ,'subcategory']
) = 'cases_apple_devices'

In [938]:
(
    orders_comp.loc[(orders_comp.brand == 'Philips')
                     &(orders_comp.category =='other')
                #    &(orders_comp.price<112)
                 #  &(orders_comp.name.str.contains('[Cc]able'))
                  #  &(orders_comp.desc.str.contains('[Dd]ock'))
                  ].sort_values('price', ascending=False)
)

Unnamed: 0,order_id,sku,qty,unit_price,total_price,price,name,order_date,brand,desc,in_stock,price_diff,disc_perc,category,subcategory
255262,503585,PHI0084,1,164.99,164.99,199.99,Philips Hue White and Color Ambiance Kit Incio...,2018-02-01 09:19:20,Philips,Philips Hue Starter Kit White and Color Ambian...,1,35.0,17.5,other,other
252098,501598,PHI0084,1,164.99,164.99,199.99,Philips Hue White and Color Ambiance Kit Incio...,2018-01-29 23:51:07,Philips,Philips Hue Starter Kit White and Color Ambian...,1,35.0,17.5,other,other
253340,502415,PHI0084,1,164.99,164.99,199.99,Philips Hue White and Color Ambiance Kit Incio...,2018-01-31 11:30:24,Philips,Philips Hue Starter Kit White and Color Ambian...,1,35.0,17.5,other,other
219589,481482,PHI0084,1,146.27,146.27,199.99,Philips Hue White and Color Ambiance Kit Incio...,2018-01-03 23:45:28,Philips,Philips Hue Starter Kit White and Color Ambian...,1,53.72,26.86,other,other
256975,504033,PHI0084,1,168.99,168.99,199.99,Philips Hue White and Color Ambiance Kit Incio...,2018-02-03 12:02:50,Philips,Philips Hue Starter Kit White and Color Ambian...,1,31.0,15.5,other,other
249359,500221,PHI0084,1,164.99,164.99,199.99,Philips Hue White and Color Ambiance Kit Incio...,2018-01-28 18:16:27,Philips,Philips Hue Starter Kit White and Color Ambian...,1,35.0,17.5,other,other
210386,473071,PHI0084,1,164.99,164.99,199.99,Philips Hue White and Color Ambiance Kit Incio...,2017-12-27 10:58:44,Philips,Philips Hue Starter Kit White and Color Ambian...,1,35.0,17.5,other,other
267035,511048,PHI0084,1,168.99,168.99,199.99,Philips Hue White and Color Ambiance Kit Incio...,2018-02-13 23:29:17,Philips,Philips Hue Starter Kit White and Color Ambian...,1,31.0,15.5,other,other
179089,452412,PHI0084,1,179.99,179.99,199.99,Philips Hue White and Color Ambiance Kit Incio...,2017-11-30 08:27:13,Philips,Philips Hue Starter Kit White and Color Ambian...,1,20.0,10.0,other,other
216531,478718,PHI0084,1,164.99,164.99,199.99,Philips Hue White and Color Ambiance Kit Incio...,2018-01-01 20:10:20,Philips,Philips Hue Starter Kit White and Color Ambian...,1,35.0,17.5,other,other


In [939]:
(
    orders_comp.loc[(orders_comp.brand == 'Philips')
                  , 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Philips')
                  , 'subcategory']
) = 'lighting'

In [912]:
(
    orders_comp.loc[(orders_comp.brand == 'Samsung')
                  , 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Samsung')
                 #  &(orders_comp.name.str.contains('[Cc]able'))
                    &(orders_comp.desc.str.contains('[Mm]onitor'))
                  , 'subcategory']
) = 'monitors'
(
    orders_comp.loc[(orders_comp.brand == 'Samsung')
                 #  &(orders_comp.name.str.contains('[Cc]able'))
                    &(orders_comp.subcategory == 'other')
                  , 'subcategory']
) = 'computer_parts_memory'

In [935]:
(
    orders_comp.loc[(orders_comp.brand == 'LG|Dell'), 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'LG|Dell'), 'subcategory']
) = 'monitors'

In [924]:
(
    orders_comp.loc[(orders_comp.brand == 'QNAP'), 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'QNAP'), 'subcategory']
) = 'servers'

In [916]:
(
    orders_comp.loc[(orders_comp.brand == 'ZaggKeys')
                  , 'category']
) ='accessories'
(
    orders_comp.loc[(orders_comp.brand == 'ZaggKeys')
                    &(orders_comp.desc.str.contains('[Hh]eadset'))
                  , 'subcategory']
) ='headphones'
(
    orders_comp.loc[(orders_comp.brand == 'ZaggKeys')
                    &(orders_comp.desc.str.contains('[Kk]eyboard'))
                  , 'subcategory']
) = 'keyboards'
(
    orders_comp.loc[(orders_comp.brand == 'ZaggKeys')
                 #  &(orders_comp.name.str.contains('[Cc]able'))
                    &(orders_comp.subcategory == 'other')
                  , 'subcategory']
) = 'cases_apple_devices'

In [918]:
(
    orders_comp.loc[(orders_comp.brand == 'Synology')
                  , 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Synology')
                 #  &(orders_comp.name.str.contains('[Cc]able'))
                    &(orders_comp.desc.str.contains('NAS'))
                  , 'subcategory']
) = 'servers'
(
    orders_comp.loc[(orders_comp.brand == 'Synology')
                 #  &(orders_comp.name.str.contains('[Cc]able'))
                    &(orders_comp.desc.str.contains('[Rr]outer'))
                  , 'subcategory']
) = 'wifi'
(
    orders_comp.loc[(orders_comp.brand == 'Synology')
                 #  &(orders_comp.name.str.contains('[Cc]able'))
                    &(orders_comp.desc.str.contains('DDR3 memory'))
                  , 'subcategory']
) = 'computer_parts_memory'
(
    orders_comp.loc[(orders_comp.brand == 'Synology')
                 #  &(orders_comp.name.str.contains('[Cc]able'))
                    &(orders_comp.desc.str.contains('Anchor rails'))
                  , 'subcategory']
) = 'computer_parts'

In [943]:
(
    orders_comp.loc[(orders_comp.brand == 'Lifeproof|Moshi'), 'category']) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Lifeproof|Moshi')
                #    &(orders_comp.price<112)
                   &(orders_comp.name.str.contains('[Cc]ase'))
                  #  &(orders_comp.desc.str.contains('[Dd]ock'))
                  ,'subcategory']
) = 'cases'

In [932]:
(
    orders_comp.loc[(orders_comp.brand == 'Spek SeeThru|Otterbox'), 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Spek SeeThru|Otterbox'), 'subcategory']
) = 'cases'

In [930]:

(
    orders_comp.loc[(orders_comp.brand == 'Tucano'), 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Tucano'), 'subcategory']
) = 'cases'

In [925]:
orders_comp.loc[orders_comp.category =='other'].info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 19775 entries, 7 to 289885
Data columns (total 15 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   order_id     19775 non-null  int64         
 1   sku          19775 non-null  object        
 2   qty          19775 non-null  int64         
 3   unit_price   19775 non-null  float64       
 4   total_price  19775 non-null  float64       
 5   price        19775 non-null  float64       
 6   name         19775 non-null  object        
 7   order_date   19775 non-null  datetime64[ns]
 8   brand        19775 non-null  object        
 9   desc         19775 non-null  object        
 10  in_stock     19775 non-null  int64         
 11  price_diff   19775 non-null  float64       
 12  disc_perc    19775 non-null  float64       
 13  category     19775 non-null  object        
 14  subcategory  19775 non-null  object        
dtypes: datetime64[ns](1), float64(5), int64(3), object(6

In [928]:
orders_comp['order_date'].describe(datetime_is_numeric=True)

count                            61322
mean     2017-09-13 01:36:01.958089984
min                2017-01-01 01:51:47
25%      2017-06-14 13:23:46.249999872
50%         2017-10-29 14:17:31.500000
75%      2017-12-25 21:52:34.750000128
max                2018-03-14 12:03:52
Name: order_date, dtype: object

In [942]:
(
    orders_comp.loc[(orders_comp.brand == 'Moshi')
                     &(orders_comp.category =='other')
                #    &(orders_comp.price<112)
                 #  &(orders_comp.name.str.contains('[Cc]able'))
                  #  &(orders_comp.desc.str.contains('[Dd]ock'))
                  ].sort_values('price', ascending=False)
)

Unnamed: 0,order_id,sku,qty,unit_price,total_price,price,name,order_date,brand,desc,in_stock,price_diff,disc_perc,category,subcategory
244581,497210,MOS0244,2,69.99,139.98,89.99,Moshi VersaKeyboard iPad Keyboard Folio (2017)...,2018-01-23 21:21:20,Moshi,Case ultralight folding keyboard for iPad with...,1,20.0,22.22,other,other
211350,473962,MOS0244,1,69.59,69.59,89.99,Moshi VersaKeyboard iPad Keyboard Folio (2017)...,2017-12-27 21:14:23,Moshi,Case ultralight folding keyboard for iPad with...,1,20.4,22.67,other,other
253434,502499,MOS0244,1,69.99,69.99,89.99,Moshi VersaKeyboard iPad Keyboard Folio (2017)...,2018-01-31 07:29:52,Moshi,Case ultralight folding keyboard for iPad with...,1,20.0,22.22,other,other
224453,484446,MOS0244,1,69.99,69.99,89.99,Moshi VersaKeyboard iPad Keyboard Folio (2017)...,2018-01-07 18:54:03,Moshi,Case ultralight folding keyboard for iPad with...,1,20.0,22.22,other,other
221883,482862,MOS0194,1,69.99,69.99,69.99,"Moshi Codex case MacBook Pro / Retina 13 ""Black",2018-01-05 14:43:08,Moshi,Waterproof case resistant hybrid polymeric mat...,0,0.0,0.0,other,other
5382,304723,MOS0180,1,34.99,34.99,55.0,Moshi iGlaze Case Luxe iPhone 6 / 6S Gray,2017-01-08 12:56:29,Moshi,acrólico transparent protective case for iPhon...,0,20.01,36.38,other,other
52434,344213,MOS0087,1,25.99,25.99,55.0,Moshi iGlaze iPhone case Kamaleon SE / 5s / 5 ...,2017-04-12 18:15:14,Moshi,IPhone Case SE / 5s / 5 with sleek aluminum su...,0,29.01,52.75,other,other
215590,477882,MOS0178,1,33.24,33.24,55.0,Moshi iGlaze Case Luxe iPhone 6 / 6S Rosa,2017-12-31 09:17:13,Moshi,acrólico transparent protective case for iPhon...,0,21.76,39.56,other,other
133519,417484,MOS0218,1,39.99,39.99,50.0,SenseCover Moshi Case Black iPhone 7/8 Plus,2017-10-25 13:40:49,Moshi,Cover with lid front opening and folding stand...,1,10.01,20.02,other,other
51695,345156,MOS0149,1,36.99,36.99,50.0,SenseCover Moshi Case iPhone 6 / 6S Plus Black,2017-04-10 18:12:39,Moshi,Cover with touch sensor to answer calls withou...,0,13.01,26.02,other,other


In [937]:
(
    orders_comp.loc[(orders_comp.brand == 'JBL')
                  ,'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'JBL')
                #    &(orders_comp.price<112)
                   &(orders_comp.name.str.contains('[Ss]peaker'))
                  #  &(orders_comp.desc.str.contains('[Dd]ock'))
                  ,'subcategory']
) = 'speakers'
(
    orders_comp.loc[(orders_comp.brand == 'JBL')
                &(orders_comp.subcategory == 'other')
                  ,'subcategory']
) = 'headphones'

In [947]:
path = r'C:\Users\muell\Desktop\WBS\Project 2\coding_challenges\clean_csv'

In [1066]:
orders_comp.to_csv(path + '\orders_final.csv', index=False)

In [1065]:
orders_comp.loc[orders_comp.subcategory == 'other'].info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6927 entries, 23 to 289601
Data columns (total 15 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   order_id     6927 non-null   int64         
 1   sku          6927 non-null   object        
 2   qty          6927 non-null   int64         
 3   unit_price   6927 non-null   float64       
 4   total_price  6927 non-null   float64       
 5   price        6927 non-null   float64       
 6   name         6927 non-null   object        
 7   order_date   6927 non-null   datetime64[ns]
 8   brand        6927 non-null   object        
 9   desc         6927 non-null   object        
 10  in_stock     6927 non-null   int64         
 11  price_diff   6927 non-null   float64       
 12  disc_perc    6927 non-null   float64       
 13  category     6927 non-null   object        
 14  subcategory  6927 non-null   object        
dtypes: datetime64[ns](1), float64(5), int64(3), object(6

In [952]:
orders_comp.brand.value_counts()

Apple                  13827
OWC                     3607
Pack                    2550
Belkin                  2256
LaCie                   2091
Crucial                 2027
Satechi                 1891
Western Digital         1859
Wacom                   1812
NewerTech               1575
iFixit                  1516
SanDisk                 1126
Seagate                  872
Griffin                  841
Samsung                  797
LG                       770
ZaggKeys                 743
Tucano                   703
Otterbox                 657
Dell                     642
JBL                      587
Synology                 558
Lifeproof                539
Philips                  533
Matias                   529
Moshi                    495
FCM                      473
Startech                 468
Spek SeeThru             444
Logitech                 435
Bose                     414
Kingston                 397
Mophie                   397
Trascend                 395
Macally       

In [1063]:
(
    orders_comp.loc[(orders_comp.brand == 'Repair')
                     &(orders_comp.subcategory =='other')
                #    &(orders_comp.price<112)
                 #  &(orders_comp.name.str.contains('[Cc]able'))
                  #  &(orders_comp.desc.str.contains('[Dd]ock'))
                  ].sort_values('price', ascending=False)
)

Unnamed: 0,order_id,sku,qty,unit_price,total_price,price,name,order_date,brand,desc,in_stock,price_diff,disc_perc,category,subcategory
228385,480784,REP0358,1,249.99,249.99,299.99,Full Screen Repair iPad Air 2,2018-01-09 21:17:38,Repair,Repair service including parts and labor for i...,0,50.0,16.67,other,other
128992,407450,REP0188,1,199.99,199.99,209.99,Full Screen Repair iPad Mini 2,2017-10-02 16:33:07,Repair,Repair service including parts and labor for i...,0,10.0,4.76,other,other
271534,506891,REP0350,1,199.99,199.99,199.99,Full Screen Repair iPad Air,2018-02-06 16:12:47,Repair,Repair service including parts and labor for i...,0,0.0,0.0,other,other
130940,415245,REP0194,1,129.0,129.0,179.99,Full screen repair iPad 3,2017-10-20 18:08:46,Repair,Repair service including parts and labor for i...,0,50.99,28.33,other,other
97186,385352,REP0396,1,189.99,189.99,119.99,Full screen repair iPhone 7 Plus,2017-08-02 12:07:21,Repair,Repair service including parts and labor for i...,0,-70.0,-58.34,other,other
278496,519867,REP0396,1,119.99,119.99,119.99,Full screen repair iPhone 7 Plus,2018-03-02 09:48:03,Repair,Repair service including parts and labor for i...,0,0.0,0.0,other,other
177059,446921,REP0396,1,189.99,189.99,119.99,Full screen repair iPhone 7 Plus,2017-11-29 07:08:38,Repair,Repair service including parts and labor for i...,0,-70.0,-58.34,other,other
170910,448191,REP0396,1,170.99,170.99,119.99,Full screen repair iPhone 7 Plus,2017-11-27 13:42:52,Repair,Repair service including parts and labor for i...,0,-51.0,-42.5,other,other
96584,384829,REP0323,1,159.99,159.99,109.99,Full screen repair iPhone 6s Plus,2017-08-01 11:25:05,Repair,Repair service including parts and labor for i...,0,-50.0,-45.46,other,other
272540,515253,REP0323,1,109.99,109.99,109.99,Full screen repair iPhone 6s Plus,2018-02-21 10:23:07,Repair,Repair service including parts and labor for i...,0,0.0,0.0,other,other


In [1064]:
(
    orders_comp.loc[(orders_comp.brand == 'Repair')
                  ,'category']
) = 'service'
(
    orders_comp.loc[(orders_comp.brand == 'Repair')
                    ,'subcategory']
) = 'repair'

In [1062]:
(
    orders_comp.loc[(orders_comp.brand == 'Service')
                  ,'category']
) = 'service'
(
    orders_comp.loc[(orders_comp.brand == 'Service')
                    ,'subcategory']
) = 'installation_repair'

In [1061]:
(
    orders_comp.loc[(orders_comp.brand == 'Puro')
                  ,'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Puro')
                    ,'subcategory']
) = 'iphone_cases'

In [1054]:
(
    orders_comp.loc[(orders_comp.brand == 'Twelve South')
                  ,'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Twelve South')
                    ,'subcategory']
) = 'cases'

In [1052]:
(
    orders_comp.loc[(orders_comp.brand == 'Minibatt')
                  ,'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Minibatt')
                    ,'subcategory']
) = 'charging'

In [1050]:
(
    orders_comp.loc[(orders_comp.brand == 'Jawbone')
                  ,'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Jawbone')
                    ,'subcategory']
) = 'monitors'

In [1048]:
(
    orders_comp.loc[(orders_comp.brand == 'LMP')
                  ,'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'LMP')
                    ,'subcategory']
) = 'hubs'
(
    orders_comp.loc[(orders_comp.brand == 'LMP')
                    &(orders_comp.desc.str.contains('keypad'))
                    ,'subcategory']
) = 'keyboards'

In [1046]:
(
    orders_comp.loc[(orders_comp.brand == 'Hoco Nike')
                  ,'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Hoco Nike')
                    ,'subcategory']
) = 'applewatch_straps'
(
    orders_comp.loc[(orders_comp.brand == 'Hoco Nike')
                    &(orders_comp.desc.str.contains('[Bb]attery'))
                    ,'subcategory']
) = 'external_battery'
(
    orders_comp.loc[(orders_comp.brand == 'Hoco Nike')
                    &(orders_comp.desc.str.contains('charging'))
                    ,'subcategory']
) = 'adapters_cables'

In [1044]:
(
    orders_comp.loc[(orders_comp.brand == 'Beats')
                  ,'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Beats')
                    ,'subcategory']
) = 'headphones'

In [1042]:
(
    orders_comp.loc[(orders_comp.brand == 'STM')
                  ,'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'STM')
                    ,'subcategory']
) = 'cases'

In [1040]:
(
    orders_comp.loc[(orders_comp.brand == 'BenQ')
                  ,'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'BenQ')
                    ,'subcategory']
) = 'monitors'

In [1038]:
(
    orders_comp.loc[(orders_comp.brand == 'Adonit')
                  ,'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Adonit')
                    ,'subcategory']
) = 'pencils'

In [1035]:
(
    orders_comp.loc[(orders_comp.brand == 'Sonos')
                  ,'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Sonos')
                    ,'subcategory']
) = 'speakers'

In [1033]:
(
    orders_comp.loc[(orders_comp.brand == 'Elgato')
                  ,'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Elgato')
                    ,'subcategory']
) = 'gadgets'
(
    orders_comp.loc[(orders_comp.brand == 'Elgato')
                    &(orders_comp.desc.str.contains('[Hh]ub'))
                    ,'subcategory']
) = 'hubs'
(
    orders_comp.loc[(orders_comp.brand == 'Elgato')
                    &(orders_comp.desc.str.contains('[Dd]ock'))
                    ,'subcategory']
) = 'adapters_cables'

In [1031]:
(
    orders_comp.loc[(orders_comp.brand == 'iOttie')
                  ,'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'iOttie')
                    ,'subcategory']
) = 'iphone_support'
(
    orders_comp.loc[(orders_comp.brand == 'iOttie')
                    &(orders_comp.desc.str.contains('charger'))
                    ,'subcategory']
) = 'iphone_charger'

In [1029]:
(
    orders_comp.loc[(orders_comp.brand == 'Moxie')
                  ,'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Moxie')
                    ,'subcategory']
) = 'iphone_cases'

In [1027]:
(
    orders_comp.loc[(orders_comp.brand == 'Thule')
                  ,'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Thule')
                    ,'subcategory']
) = 'macbook_cases'

In [1025]:
(
    orders_comp.loc[(orders_comp.brand == 'Plantronics')
                  ,'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Plantronics')
                    ,'subcategory']
) = 'headphones'

In [1023]:
(
    orders_comp.loc[(orders_comp.brand == 'Replacement')
                  ,'category']
) = 'spare-parts'
(
    orders_comp.loc[(orders_comp.brand == 'Replacement')
                    &(orders_comp.name.str.contains('screen'))
                    ,'subcategory']
) = 'iphone_screen'
(
    orders_comp.loc[(orders_comp.brand == 'Replacement')
                    &(orders_comp.name.str.contains('[Bb]attery'))
                    ,'subcategory']
) = 'iphone_battery'
(
    orders_comp.loc[(orders_comp.brand == 'Replacement')
                    &(orders_comp.name.str.contains('[Cc]able'))
                    ,'subcategory']
) = 'macbook_cable'

In [1021]:
(
    orders_comp.loc[(orders_comp.brand == 'G-Technology')
                  ,'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'G-Technology')
                    ,'subcategory']
) = 'memory'

In [1017]:
(
    orders_comp.loc[(orders_comp.brand == 'X-Doria')
                  ,'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'X-Doria')
                    &(orders_comp.name.str.contains('strap', case=False))
                    ,'subcategory']
) = 'applewatch_straps'
(
    orders_comp.loc[(orders_comp.brand == 'X-Doria')
                    &(orders_comp.name.str.contains('housing', case=False))
                    ,'subcategory']
) = 'applewatch_cases'
(
    orders_comp.loc[(orders_comp.brand == 'X-Doria')
                    &(orders_comp.name.str.contains('ipad', case=False))
                    ,'subcategory']
) = 'ipad_cases'
(
    orders_comp.loc[(orders_comp.brand == 'X-Doria')
                    &(orders_comp.name.str.contains('iphone', case=False))
                    ,'subcategory']
) = 'iphone_cases'

In [1015]:
(
    orders_comp.loc[(orders_comp.brand == 'Muvit')
                  ,'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Muvit')
                    ,'subcategory']
) = 'iphone_cases'

In [1013]:
(
    orders_comp.loc[(orders_comp.brand == 'Wowewa')
                  ,'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Wowewa')
                    &(orders_comp.desc.str.contains('[Bb]attery'))
                    ,'subcategory']
) = 'external_battery'
(
    orders_comp.loc[(orders_comp.brand == 'Wowewa')
                    &(orders_comp.desc.str.contains('[Pp]rotector'))
                    ,'subcategory']
) = 'iphone_cases'

In [1011]:
(
    orders_comp.loc[(orders_comp.brand == 'Kanex')
                  ,'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Kanex')
                    ,'subcategory']
) = 'adapters_cables'

In [1009]:
(
    orders_comp.loc[(orders_comp.brand == 'Sphero')
                  ,'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Sphero')
                    ,'subcategory']
) = 'gadgets'

In [1005]:
(
    orders_comp.loc[(orders_comp.brand == 'Hyper Pearl')
                  ,'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Hyper Pearl')
                    ,'subcategory']
) = 'gadgets'
(
    orders_comp.loc[(orders_comp.brand == 'Hyper Pearl')
                    &(orders_comp.desc.str.contains('[Hh]ub'))
                    ,'subcategory']
) = 'hubs'
(
    orders_comp.loc[(orders_comp.brand == 'Hyper Pearl')
                    &(orders_comp.name.str.contains('[Bb]attery'))
                    ,'subcategory']
) = 'external_battery'

In [1003]:
(
    orders_comp.loc[(orders_comp.brand == 'Netatmo')
                  ,'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Netatmo')
                    ,'subcategory']
) = 'gadgets'

In [1001]:
(
    orders_comp.loc[(orders_comp.brand == 'TrackR')
                  ,'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'TrackR')
                    ,'subcategory']
) = 'gadgets'

In [999]:
(
    orders_comp.loc[(orders_comp.brand == 'Toshiba')
                  ,'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Toshiba')
                    ,'subcategory']
) = 'memory'

In [997]:
(
    orders_comp.loc[(orders_comp.brand == 'Allocacoc')
                  ,'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Allocacoc')
                    ,'subcategory']
) = 'adapters_cables'

In [995]:
(
    orders_comp.loc[(orders_comp.brand == 'D-Link')
                  ,'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'D-Link')
                    ,'subcategory']
) = 'wifi'
(
    orders_comp.loc[(orders_comp.brand == 'D-Link')
                    &(orders_comp.desc.str.contains('[Cc]amera'))
                    ,'subcategory']
) = 'cameras'

In [994]:
(
    orders_comp.loc[(orders_comp.brand == 'TP-Link')
                  ,'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'TP-Link')
                    ,'subcategory']
) = 'wifi'
(
    orders_comp.loc[(orders_comp.brand == 'TP-Link')
                    &(orders_comp.desc.str.contains('[Cc]able'))
                    ,'subcategory']
) = 'adapters_cables'

In [990]:
(
    orders_comp.loc[(orders_comp.brand == 'Lexar')
                  ,'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Lexar')
                    ,'subcategory']
) = 'memory'

In [986]:
(
    orders_comp.loc[(orders_comp.brand == 'Macally')
                  ,'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Macally')
                   &(orders_comp.desc.str.contains('[Cc]able|[Aa]dapter'))
                    ,'subcategory']
) = 'adapters_cables'
(
    orders_comp.loc[(orders_comp.brand == 'Macally')
                   &(orders_comp.desc.str.contains('[Hh]ub'))
                    ,'subcategory']
) = 'hub'
(
    orders_comp.loc[(orders_comp.brand == 'Macally')
                   &(orders_comp.desc.str.contains('[Ll]amp'))
                    ,'subcategory']
) = 'lampcharge'
(
    orders_comp.loc[(orders_comp.brand == 'Macally')
                   &(orders_comp.desc.str.contains('outer box'))
                    ,'subcategory']
) = 'cases'

In [982]:
(
    orders_comp.loc[(orders_comp.brand == 'Trascend')
                  ,'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Trascend')
                  ,'subcategory']
) = 'computer_parts_memory'
(
    orders_comp.loc[(orders_comp.brand == 'Trascend')
                    &(orders_comp.desc.str.contains('DVD'))
                  ,'subcategory']
) = 'dvd'

In [979]:
(
    orders_comp.loc[(orders_comp.brand == 'Mophie')
                  ,'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Mophie')
                    &(orders_comp.desc.str.contains('[Bb]attery'))
                  ,'subcategory']
) = 'external_battery'
(
    orders_comp.loc[(orders_comp.brand == 'Mophie')
                    &(orders_comp.name.str.contains('[Cc]ase'))
                  ,'subcategory']
) = 'cases'
(
    orders_comp.loc[(orders_comp.brand == 'Mophie')
                    &(orders_comp.desc.str.contains('charging dock'))
                  ,'subcategory']
) = 'charging_iphone'

In [978]:
(
    orders_comp.loc[(orders_comp.brand == 'Kingston')
                  ,'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Kingston')
                  ,'subcategory']
) = 'computer_parts_memory'

In [964]:
(
    orders_comp.loc[(orders_comp.brand == 'Bose')
                  ,'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Bose')
                 #  &(orders_comp.name.str.contains('[Cc]able'))
                    &(orders_comp.desc.str.contains('[Hh]eadphone|[Hh]eadset|[Ee]arphone'))
                  ,'subcategory']
) = 'headphones'
(
    orders_comp.loc[(orders_comp.brand == 'Bose')
                 #  &(orders_comp.name.str.contains('[Cc]able'))
                    &(orders_comp.desc.str.contains('[Ss]peaker'))
                  ,'subcategory']
) = 'speakers'
(
    orders_comp.loc[(orders_comp.brand == 'Bose')
                 #  &(orders_comp.name.str.contains('[Cc]able'))
                    &(orders_comp.desc.str.contains('[Mm]icrophone'))
                  ,'subcategory']
) = 'microphones'
(
    orders_comp.loc[(orders_comp.brand == 'Bose')
                 #  &(orders_comp.name.str.contains('[Cc]able'))
                    &(orders_comp.desc.str.contains('music system'))
                  ,'subcategory']
) = 'music_systems'

In [961]:
(
    orders_comp.loc[(orders_comp.brand == 'Logitech'), 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Logitech')
                #    &(orders_comp.price<112)
                   &(orders_comp.name.str.contains('[Cc]over|Folio'))
                  #  &(orders_comp.desc.str.contains('[Dd]ock'))
                  ,'subcategory']
) = 'cases'
(
    orders_comp.loc[(orders_comp.brand == 'Logitech')
                #    &(orders_comp.price<112)
                   &(orders_comp.desc.str.contains('[Kk]eyboard'))
                  #  &(orders_comp.desc.str.contains('[Dd]ock'))
                  ,'subcategory']
) = 'keyboards'
(
    orders_comp.loc[(orders_comp.brand == 'Logitech')
                #    &(orders_comp.price<112)
                   &(orders_comp.desc.str.contains('[Mm]ouse'))
                  #  &(orders_comp.desc.str.contains('[Dd]ock'))
                  ,'subcategory']
) = 'mouses'
(
    orders_comp.loc[(orders_comp.brand == 'Logitech')
                #    &(orders_comp.price<112)
                   &(orders_comp.desc.str.contains('[Ss]peaker'))
                  #  &(orders_comp.desc.str.contains('[Dd]ock'))
                  ,'subcategory']
) = 'speakers'
(
    orders_comp.loc[(orders_comp.brand == 'Logitech')
                #    &(orders_comp.price<112)
                   &(orders_comp.desc.str.contains('[Rr]emote [Cc]ontrol'))
                  #  &(orders_comp.desc.str.contains('[Dd]ock'))
                  ,'subcategory']
) = 'gadgets'

In [954]:
(
    orders_comp.loc[(orders_comp.brand == 'Startech')
                  , 'category']
) = 'accessories'
(
    orders_comp.loc[(orders_comp.brand == 'Startech')
                   &(orders_comp.desc.str.contains('[Cc]able|[Aa]dapter'))
                  #  &(orders_comp.desc.str.contains('[Dd]ock'))
                  , 'subcategory']
) = 'adapters_cables'
(
    orders_comp.loc[(orders_comp.brand == 'Startech')
                   &(orders_comp.desc.str.contains('[Hh]ub'))
                  #  &(orders_comp.desc.str.contains('[Dd]ock'))
                  , 'subcategory']
) = 'hubs'
(
    orders_comp.loc[(orders_comp.brand == 'Startech')
                   &(orders_comp.desc.str.contains('SuperDrive'))
                  #  &(orders_comp.desc.str.contains('[Dd]ock'))
                  , 'subcategory']
) = 'cases'


In [972]:
orders_comp.loc[orders_comp.category == 'iphones', 'subcategory'] = 'cases_iphone'
orders_comp.loc[orders_comp.category == 'iphones', 'category'] = 'accessories'

In [973]:
orders_comp.loc[orders_comp.category == 'iphones']

Unnamed: 0,order_id,sku,qty,unit_price,total_price,price,name,order_date,brand,desc,in_stock,price_diff,disc_perc,category,subcategory
