# Clean Auctions Data

In [None]:
# Imports

import pandas as pd
import numpy as np

In [None]:
# Read auctions from file

auctions_df = pd.read_csv('../data/auctions/d16_h15_auctions.csv', index_col = 0)

print(auctions_df.shape)

auctions_df.head()

In [None]:
# Remove items that do not have a buyout price

auctions_df = auctions_df[~auctions_df['buyout'].isna()]

auctions_df.shape

In [None]:
# Remove items that have a price higher than gold limit (9999999g)

auctions_df = auctions_df[auctions_df['buyout'] < 1000000000]

auctions_df.shape

In [None]:
auctions_df.head()

In [None]:
# Drop columns 'auction_id', 'time_left', 'bid'

auctions_df.drop(['auction_id', 'time_left', 'bid'], axis = 1, inplace = True)

auctions_df

In [None]:
# Check all rows of a specific item

auctions_df[auctions_df['item_id'] == 31157]

In [None]:
# Use the cheapest price available since there is no way to get information about the sold auctions.
# Group by item_id and sum quantity

auctions_df = auctions_df.groupby('item_id').agg({'buyout': np.min, 'quantity': np.sum, 'timestamp': 'first'})

auctions_df['buyout'] = auctions_df['buyout'].astype(int)

auctions_df.rename(columns = {'buyout': 'unit_price'}, inplace = True)

auctions_df.reset_index(inplace = True)

auctions_df = auctions_df[['item_id', 'quantity', 'unit_price', 'timestamp']]

auctions_df

In [None]:
# Check for remaining NaN

auctions_df.isna().sum()