In [2]:
from sqlalchemy import create_engine, inspect
import creds
import pandas as pd

In [23]:
host = creds.host
password = creds.password
port = creds.port
user = creds.user
database = creds.database
api_type = creds.api_type
engine = create_engine(f'postgresql+{api_type}://{user}:{password}@{host}:{port}/{database}')
# engine.connect()

In [24]:
df = pd.read_sql_table('products', engine)


In [25]:
### Cleaning up the price column in the dataframe.
# Removes all rows which have "N/A" in the price column.
df = df[df['price'] != 'N/A']
# Removes all '£' from the price column
df['price'] = df['price'].str.strip('£')
# Changes all price values into floats/integers (numbers).
df['price'] = df['price'].str.replace(',', '')
df['price'] = df['price'].astype('float64')
df.head(100)

Unnamed: 0,id,product_name,category,product_description,price,location,page_id,create_time
1,243809c0-9cfc-4486-ad12-3b7a16605ba9,"Mirror wall art | in Wokingham, Berkshire | Gu...","Home & Garden / Dining, Living Room Furniture ...","Mirror wall art. Posted by Nisha in Dining, Li...",5.0,"Wokingham, Berkshire",1426704584,2022-02-26
2,1c58d3f9-8b93-47ea-9415-204fcc2a22e6,"Stainless Steel Food Steamer | in Inverness, H...",Home & Garden / Other Household Goods,Morphy Richard’s (model no 48755)Stainless ste...,20.0,"Inverness, Highland",1426704579,2022-02-26
3,860673f1-57f6-47ba-8d2f-13f9e05b8f9a,"Sun loungers | in Skegness, Lincolnshire | Gum...",Home & Garden / Garden & Patio / Outdoor Setti...,I have 2 of these - collection only as I don’t...,20.0,"Skegness, Lincolnshire",1426704576,2022-02-26
4,59948726-29be-4b35-ade5-bb2fd7331856,Coffee side table from Ammunition ammo box hai...,"Home & Garden / Dining, Living Room Furniture ...",Great reclaimed army ammunition box used as co...,115.0,"Radstock, Somerset",1426704575,2022-02-26
5,16dbc860-696e-4cda-93f6-4dd4926573fb,Modern Shannon Sofa for sale at low cost | in ...,"Home & Garden / Dining, Living Room Furniture ...",New Design Shannon Corner sofa 5 Seater Avail...,450.0,"Delph, Manchester",1426704570,2022-02-26
...,...,...,...,...,...,...,...,...
103,248ba22c-0329-4132-aa33-0036b35d9884,"3 Seater Sofa | in Carshalton, London | Gumtree","Home & Garden / Dining, Living Room Furniture ...","3 Seater Leather Sofa - Used, but good conditi...",100.0,"Carshalton, London",1426701637,2022-02-26
104,e0fc0443-7cea-43a9-8446-f3c0ea7fd2d9,"Big Taxi canvas picture | in Kirkcaldy, Fife |...","Home & Garden / Dining, Living Room Furniture ...",Big Taxi canvas picture\ras good as new \rDime...,5.0,"Kirkcaldy, Fife",1426701627,2022-02-26
105,ca4f0b85-219e-44e3-8e19-7b4425a2ffc4,"Family of 4 wooden ducks | in St Andrews, Fife...",Home & Garden / Other Household Goods,Family of 4 wooden ducks poor things have been...,45.0,"St Andrews, Fife",1420504407,2022-02-26
107,d3423b1d-2565-416a-935e-056d67363c12,"Farmhouse table | in Telscombe Cliffs, East Su...","Home & Garden / Dining, Living Room Furniture ...",Farmhouse Table \r160cm x 80cm \rseats 6 comfo...,25.0,"Telscombe Cliffs, East Sussex",1426701614,2022-02-26


In [19]:
df['category'] = df['category'].astype('category')
df['category']

1       Home & Garden / Dining, Living Room Furniture ...
2                   Home & Garden / Other Household Goods
3       Home & Garden / Garden & Patio / Outdoor Setti...
4       Home & Garden / Dining, Living Room Furniture ...
5       Home & Garden / Dining, Living Room Furniture ...
                              ...                        
8085    Video Games & Consoles / Consoles / PS4 (Sony ...
8086    Video Games & Consoles / Consoles / PS4 (Sony ...
8088    Video Games & Consoles / Other Video Games & C...
8089    Video Games & Consoles / Video Game Accessorie...
8090                       Video Games & Consoles / Games
Name: category, Length: 7156, dtype: category
Categories (435, object): ['Appliances / Dishwashers', 'Appliances / Freezers', 'Appliances / Fridge Freezers', 'Appliances / Health & Beauty Appliances / Den..., ..., 'Video Games & Consoles / Video Game Accessori..., 'Video Games & Consoles / Video Game Accessori..., 'Video Games & Consoles / Video Game Accessori

In [31]:
## This will check whether any row has been duplicated. The Sum will tell us how often it as been duplicated (0 = no duplicate, 1 = 1 duplicate, 2 = 2 duplicates etc.)
df.duplicated().sum()
# Better version of this is to bring in multiple columns and check if there is a product that is the same over all columns.
# keep = false will make sure we keep looping and it wont stop after finding a duplicate.
duplicates = df.duplicated(subset=["product_name", "category", "product_description", "price", "location"], keep=False)
df[duplicates]
# we can also sort these out using df[duplicates].sort_values(by='columname')


Unnamed: 0,id,product_name,category,product_description,price,location,page_id,create_time
58,2e6db1e3-b60d-456a-8320-ddf4827e464f,February Sale offer Divan bed with mattress av...,Home & Garden / Beds & Bedroom Furniture / Sin...,💓Brand New All Beds 🛏️ Avaliable Here Please T...,130.0,"Nechells, West Midlands",1426701749,2022-02-26
71,a33f8af2-093e-40e3-b86c-477c3addb431,February Sale offer Divan bed with mattress av...,Home & Garden / Beds & Bedroom Furniture / Sin...,💓Brand New All Beds 🛏️ Avaliable Here Please T...,130.0,"Nechells, West Midlands",1426701712,2022-02-26
5268,eb60f24a-0171-4c29-b09a-2c8155809b3e,"Mehndi | in Sparkhill, West Midlands | Gumtree",Health & Beauty / Tattoo & Body Art,Each side- £5Both hands Upper side £9Both hand...,5.0,"Sparkhill, West Midlands",1423805014,2022-02-27
6084,76d84d36-bd2d-4cb2-a9c1-9b9c11ea1e52,"Mehndi | in Sparkhill, West Midlands | Gumtree",Health & Beauty / Tattoo & Body Art,Each side- £5Both hands Upper side £9Both hand...,5.0,"Sparkhill, West Midlands",1423804831,2022-02-27


In [32]:
# This will remove all duplicates from the code
df.drop_duplicates(subset=["product_name", "category", "product_description", "price", "location"], keep=False)

Unnamed: 0,id,product_name,category,product_description,price,location,page_id,create_time
1,243809c0-9cfc-4486-ad12-3b7a16605ba9,"Mirror wall art | in Wokingham, Berkshire | Gu...","Home & Garden / Dining, Living Room Furniture ...","Mirror wall art. Posted by Nisha in Dining, Li...",5.0,"Wokingham, Berkshire",1426704584,2022-02-26
2,1c58d3f9-8b93-47ea-9415-204fcc2a22e6,"Stainless Steel Food Steamer | in Inverness, H...",Home & Garden / Other Household Goods,Morphy Richard’s (model no 48755)Stainless ste...,20.0,"Inverness, Highland",1426704579,2022-02-26
3,860673f1-57f6-47ba-8d2f-13f9e05b8f9a,"Sun loungers | in Skegness, Lincolnshire | Gum...",Home & Garden / Garden & Patio / Outdoor Setti...,I have 2 of these - collection only as I don’t...,20.0,"Skegness, Lincolnshire",1426704576,2022-02-26
4,59948726-29be-4b35-ade5-bb2fd7331856,Coffee side table from Ammunition ammo box hai...,"Home & Garden / Dining, Living Room Furniture ...",Great reclaimed army ammunition box used as co...,115.0,"Radstock, Somerset",1426704575,2022-02-26
5,16dbc860-696e-4cda-93f6-4dd4926573fb,Modern Shannon Sofa for sale at low cost | in ...,"Home & Garden / Dining, Living Room Furniture ...",New Design Shannon Corner sofa 5 Seater Avail...,450.0,"Delph, Manchester",1426704570,2022-02-26
...,...,...,...,...,...,...,...,...
8085,c4148656-78a9-4f3e-b393-134fdc5ef900,Sony PlayStation VR Move Bundle | in Acocks Gr...,Video Games & Consoles / Consoles / PS4 (Sony ...,Sony PlayStation VR Move Bundle353CASH ON COLL...,260.0,"Acocks Green, West Midlands",1422159237,2022-02-28
8086,564e3411-768d-4250-a624-b119d696f103,"Playstation VR V2 Bundle | in Acocks Green, We...",Video Games & Consoles / Consoles / PS4 (Sony ...,Playstation VR V2 Bundle355CASH ON COLLECTION ...,235.0,"Acocks Green, West Midlands",1422159464,2022-02-28
8088,2b0a652b-46a2-4297-b619-5efeeb222787,"Oculus quest 2 256gb | in Montrose, Angus | Gu...",Video Games & Consoles / Other Video Games & C...,Pick up only £250Comes with two pistols stocks...,250.0,"Montrose, Angus",1426668818,2022-02-28
8089,719fd40a-870e-4144-b324-55dff2e66fb4,Logitech driving force shifter | in Carrickfer...,Video Games & Consoles / Video Game Accessorie...,Bought at christmas from currys retailing at £...,30.0,"Carrickfergus, County Antrim",1426699715,2022-02-28


In [None]:
df.drop(['create_time', 'page_id'], axis=1)