In [216]:
# importing modules and relevant data set

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt # Visualization
import seaborn as sns
import os

df = pd.read_csv('auction.csv')
df.head()

Unnamed: 0,auctionid,bid,bidtime,bidder,bidderrate,openbid,price,item,auction_type
0,1638843936,500.0,0.478368,kona-java,181.0,500.0,1625.0,Cartier wristwatch,7 day auction
1,1638843936,800.0,0.826389,doc213,60.0,500.0,1625.0,Cartier wristwatch,7 day auction
2,1638843936,600.0,3.761123,zmxu,7.0,500.0,1625.0,Cartier wristwatch,7 day auction
3,1638843936,1500.0,5.226377,carloss8055,5.0,500.0,1625.0,Cartier wristwatch,7 day auction
4,1638843936,1600.0,6.570625,jdrinaz,6.0,500.0,1625.0,Cartier wristwatch,7 day auction


##### The variables involved in the data set are as follows:

* **auctionid:** unique id for each auction
* **bid:** proxy bid placed by bidder
* **bidtime:** time bet is placed in relation to total auction length
* **bidder:** bidder username
* **biderrate:** bidder's rating on ebay
* **openbid:** opening bid priced by seller
* **item:** name of item
* **auction_type:** type of auction (3day, 5day, 7day)




##### First, we will begin with checking the dataset for null values

In [19]:
df.isnull().sum()

auctionid        0
bid              0
bidtime          0
bidder          16
bidderrate      11
openbid          0
price            0
item             0
auction_type     0
dtype: int64

In [24]:
print('Shape of entries with null values: ', df[(df.bidderrate.isnull()) | df.bidder.isnull()].shape)
df[(df.bidderrate.isnull()) | df.bidder.isnull()]

Shape of entries with null values:  (27, 9)


Unnamed: 0,auctionid,bid,bidtime,bidder,bidderrate,openbid,price,item,auction_type
8049,8212140993,135.0,6.402141,mac_ranch,,9.99,306.0,Xbox game console,7 day auction
8050,8212140993,145.0,6.402384,mac_ranch,,9.99,306.0,Xbox game console,7 day auction
8189,8212190120,22.22,4.537535,Private,,12.99,28.0,Xbox game console,7 day auction
8190,8212190120,15.0,4.769734,Private,,12.99,28.0,Xbox game console,7 day auction
8191,8212190120,22.0,4.769815,Private,,12.99,28.0,Xbox game console,7 day auction
8192,8212190120,22.72,6.738773,Private,,12.99,28.0,Xbox game console,7 day auction
8193,8212190120,24.0,6.753611,Private,,12.99,28.0,Xbox game console,7 day auction
8194,8212190120,25.0,6.857685,Private,,12.99,28.0,Xbox game console,7 day auction
8195,8212190120,25.0,6.943333,Private,,12.99,28.0,Xbox game console,7 day auction
8196,8212190120,28.0,6.989757,Private,,12.99,28.0,Xbox game console,7 day auction


#### There are 27 rows with null values, but in the grand scheme of things they will not affect the dataset, we will simply fill the null username with Private and fill missing biderrate with the mean value which is 32

In [39]:
df.bidder.fillna('Private', inplace=True)
df.bidderrate.fillna(32, inplace=True)

##### After the data is cleaned, we will begin by taking a look at the total unique values in each attribute

In [52]:
df.nunique()

auctionid         628
bid              1858
bidtime         10400
bidder           3387
bidderrate        301
openbid           109
price             318
item                3
auction_type        3
dtype: int64

##### The total number of auction is the most valuable data and we will try to derive more information from it, we would also like to find out if each type of aution only caters to a specific kind of item

In [198]:
#initializing secondary dataframe derived from original dataset to produce more detailed values
auctions_df = pd.DataFrame({
                'bid_count':df.groupby('auctionid').bid.count(),
                'opening_price':df.groupby('auctionid').openbid.mean(),
                'closing_price':df.groupby('auctionid').price.mean()
             })

In [199]:
def get_item(auctionid_input): 
    return df[df.auctionid == auctionid_input]['item'].unique()[0]
def get_auction_type(auctionid_input): 
    return df[df.auctionid == auctionid_input]['auction_type'].unique()[0]

# create new columns corresponding to auction id's item sold and auction type
auctions_df.reset_index(inplace=True)
auctions_df['profit'] = df_branch['closing_price'] - df_branch['opening_price']
auctions_df['item'] = df2['auctionid'].apply(get_item)
auctions_df['auction_type'] = df2['auctionid'].apply(get_auction_type)
# df2.set_index('auctionid', inplace=True)
# df2.drop(['index'], axis=1, inplace=True)

## Using the original dataframe, we create a new branch depicting information of each individual auctionid

In [200]:
auctions_df.head(10)

Unnamed: 0,auctionid,bid_count,opening_price,closing_price,profit,item,auction_type
0,1638843936,7,500.0,1625.0,1125.0,Cartier wristwatch,7 day auction
1,1638844284,2,200.0,500.0,300.0,Cartier wristwatch,7 day auction
2,1638844464,16,300.0,740.0,440.0,Cartier wristwatch,7 day auction
3,1638844729,11,225.0,320.0,95.0,Cartier wristwatch,7 day auction
4,1638893549,5,99.0,177.5,78.5,Cartier wristwatch,3 day auction
5,1638917885,13,25.0,227.5,202.5,Cartier wristwatch,7 day auction
6,1639226378,12,495.0,1725.0,1230.0,Cartier wristwatch,7 day auction
7,1639253454,2,250.0,255.0,5.0,Cartier wristwatch,7 day auction
8,1639309309,27,1.0,374.99,373.99,Cartier wristwatch,7 day auction
9,1639323228,8,1.0,185.0,184.0,Cartier wristwatch,7 day auction


##### More insight is obtained by sectioning into different auction types and item to see the individual performances of different types of items in different types of auction

In [213]:
auctions_visualize = auctions_df.groupby(['auction_type', 'item'])[['bid_count', 'opening_price', 'closing_price', 'profit',
       'item', 'auction_type']].mean()
auctions_visualize

Unnamed: 0_level_0,Unnamed: 1_level_0,bid_count,opening_price,closing_price,profit
auction_type,item,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3 day auction,Cartier wristwatch,13.888889,189.719444,630.856667,441.137222
3 day auction,Palm Pilot M515 PDA,12.8,113.11,223.085895,109.975895
3 day auction,Xbox game console,15.914286,43.008857,118.426571,75.417714
5 day auction,Cartier wristwatch,16.904762,309.283333,864.079048,554.795714
5 day auction,Palm Pilot M515 PDA,16.092593,98.481296,229.873148,131.391852
5 day auction,Xbox game console,18.714286,46.905238,139.052381,92.147143
7 day auction,Cartier wristwatch,13.896907,272.21567,936.069278,663.853608
7 day auction,Palm Pilot M515 PDA,19.752577,53.771368,231.800825,178.029457
7 day auction,Xbox game console,20.010753,36.217419,134.576989,98.35957


In [None]:
f, ax = plt.subplots(1, 1, figsize=(11, 6))
g = sns.displot(
        data = auctions_df,
        x = 'bid_count',
        color = '#55807a'
    )

###### TODO: Perform visualiation of auctions_df 
###### TODO: Find out how to determmine when a bidder wins an auction