In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import *
from sklearn.metrics import *
from sklearn.preprocessing import *
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import VotingRegressor
#from xgboost import XGBRegressor

### Loading Dataset

In [5]:
auction_data = pd.read_csv("https://raw.githubusercontent.com/dphi-official/Datasets/master/auction_data/train_set_label.csv" )
test_data = pd.read_csv('https://raw.githubusercontent.com/dphi-official/Datasets/master/auction_data/test_set_label.csv')

In [6]:
auction_data.head()

Unnamed: 0,auctionid,bid,bidtime,bidder,bidderrate,openbid,price
0,1644594033,493.0,6.999456,boraborabora,67,9.99,498.0
1,1639309309,30.0,1.669618,aclbjc,0,1.0,374.99
2,1644109746,3103.0,6.917986,nelsoncpm,7,1.0,3103.0
3,1649858595,175.0,3.107014,pialu9,4,7.99,202.5
4,1647846714,226.0,2.721319,geemeetee@aol.com,17,1.0,905.45


In [7]:
auction_data.describe()

Unnamed: 0,auctionid,bid,bidtime,bidderrate,openbid,price
count,1213.0,1213.0,1213.0,1213.0,1213.0,1213.0
mean,1644577000.0,647.652539,4.078951,31.807914,159.142852,1010.246076
std,3566101.0,701.003386,2.505148,79.112571,388.456626,832.676139
min,1638844000.0,1.0,0.007535,-4.0,0.01,103.5
25%,1641784000.0,161.0,1.578866,1.0,1.0,374.99
50%,1644139000.0,400.0,4.302025,5.0,7.99,680.0
75%,1647847000.0,895.0,6.777083,30.0,175.0,1600.0
max,1650986000.0,5400.0,6.999965,1303.0,5000.0,5400.0


In [8]:
auction_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1213 entries, 0 to 1212
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   auctionid   1213 non-null   int64  
 1   bid         1213 non-null   float64
 2   bidtime     1213 non-null   float64
 3   bidder      1213 non-null   object 
 4   bidderrate  1213 non-null   int64  
 5   openbid     1213 non-null   float64
 6   price       1213 non-null   float64
dtypes: float64(4), int64(2), object(1)
memory usage: 66.5+ KB


# Building Models

In [9]:
features = ['bid','bidtime','bidderrate','openbid','auctionid']
X = auction_data[features]
y = auction_data['price']

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.27,random_state=9)

In [11]:
models = {'LR' : LinearRegression(),
'DT' : DecisionTreeRegressor(),
'KNN': KNeighborsRegressor(),
'SVR': SVR(),
'RF' : RandomForestRegressor()}

In [13]:
def models_perfomance(X_train,X_test,y_train,y_test):
    results = {}
    for name, model in models.items():
        model.fit(X_train,y_train)
        rmse = mean_squared_error(y_test,model.predict(X_test), squared= False)
        results[name] = rmse
    
    return results

In [14]:
models_perfomance(X_train,X_test,y_train,y_test)

{'LR': 394.965939700408,
 'DT': 359.4755182482071,
 'KNN': 124.87796929949596,
 'SVR': 861.0176013627166,
 'RF': 297.54777710435997}

In [15]:
my_model = RandomForestRegressor(n_estimators=400)
my_model.fit(X_train,y_train)
mean_squared_error(y_test,my_model.predict(X_test), squared= False)

287.027938144053

In [20]:
my_model3 = KNeighborsRegressor(n_neighbors=1)
my_model3.fit(X_train,y_train)
mean_squared_error(y_test,my_model3.predict(X_test), squared= False)

56.65137287639628

# Predicting the Test Dataset

In [22]:
test_data.head()

Unnamed: 0,auctionid,bid,bidtime,bidder,bidderrate,openbid
0,1640550476,175.0,4.32941,simonho2001sg,8,1.0
1,1644343468,111.11,0.587164,tab999@msn.com,84,1.0
2,1639979107,103.5,6.999572,inyerdrms,2,5.0
3,1649718196,620.0,2.750775,kjr2306,24,1.0
4,1644077820,60.0,1.393611,chevysred,-1,1.0


In [23]:
test_data = test_data[features]

In [24]:
target = my_model3.predict(test_data)

In [25]:
res = pd.DataFrame(target) #target is nothing but the final predictions of your model on input features of your new unseen test data
res.index = test_data.index # its important for comparison. Here "test_new" is your new test dataset
res.columns = ["prediction"]
res.to_csv("submission.csv",index=False) # the csv file will be saved locally on the same location where this notebook is located.