In [8]:
import pandas
import time
from sklearn.model_selection import train_test_split as tts
from sklearn.metrics import accuracy_score as acc
from sklearn.externals import joblib
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.linear_model import Perceptron
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier as MLPC
from sklearn.naive_bayes import GaussianNB as GNB
from sklearn.naive_bayes import BernoulliNB as BNB
from sklearn.naive_bayes import MultinomialNB as MNB
from sklearn.preprocessing import MinMaxScaler as MMS
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression as LR
from sklearn.neighbors import KNeighborsClassifier as KNC


dota_data = pandas.read_csv('dota2Train.csv', header = None)

In [9]:
(training_data, validation_data) = tts(dota_data, test_size = .15)

mms = MMS()
training_data_temp = mms.fit_transform(training_data)
validation_data_temp = mms.fit_transform(validation_data)

training_data_mms = pandas.DataFrame(data=training_data_temp[:,:])
validation_data_mms = pandas.DataFrame(data=validation_data_temp[:,:])



pca = PCA(n_components=10, whiten=True)
pca.fit(training_data)
print(pca.explained_variance_)

training_data_temp2 = pca.transform(training_data)
validation_data_temp2 = pca.transform(validation_data)

training_data_pca = pandas.DataFrame(data=training_data_temp2[:,:])
validation_data_pca = pandas.DataFrame(data=validation_data_temp2[:,:])

[  1.27100666e+03   6.71633971e+00   1.00552924e+00   3.52470512e-01
   3.33823121e-01   3.11002346e-01   2.93398914e-01   2.69067867e-01
   2.55245640e-01   2.34151401e-01]


In [62]:
rfc = RFC(n_jobs=-1, n_estimators=100, max_features=0.5)
rfc.fit(training_data.iloc[:,1:],training_data.iloc[:,0])
rfc_pred = rfc.predict(validation_data.iloc[:,1:])
print(acc(rfc_pred, validation_data.iloc[:,0]))

0.569886670264


In [66]:
perc = Perceptron(n_jobs=-1, n_iter=1001,eta0=0.1)
perc.fit(training_data.iloc[:,1:],training_data.iloc[:,0])
perc_pred = perc.predict(validation_data.iloc[:,1:])
print(acc(perc_pred, validation_data.iloc[:,0]))

0.58553696708


In [16]:
mlpc = MLPC(hidden_layer_sizes=(115,115,115,115,115,115), activation='tanh', max_iter=500)
mlpc.fit(training_data.iloc[:,1:],training_data.iloc[:,0])
mlpc_pred = mlpc.predict(validation_data.iloc[:,1:])
print(acc(mlpc_pred, validation_data.iloc[:,0]))

0.59422558014


In [23]:
bnb = BNB()
bnb.fit(training_data.iloc[:,1:],training_data.iloc[:,0])
bnb_pred = bnb.predict(validation_data.iloc[:,1:])
print(acc(bnb_pred, validation_data.iloc[:,0]))

0.567203914232


In [22]:
gnb = GNB()
gnb.fit(training_data.iloc[:,1:],training_data.iloc[:,0])
gnb_pred = gnb.predict(validation_data.iloc[:,1:])
print(acc(gnb_pred, validation_data.iloc[:,0]))

0.56461361347


In [21]:
mnb = MNB()
mnb.fit(training_data_mms.iloc[:,1:],training_data_mms.iloc[:,0])
mnb_pred = mnb.predict(validation_data_mms.iloc[:,1:])
print(acc(mnb_pred, validation_data_mms.iloc[:,0]))

0.527917685998


In [None]:
start_time = time.time()
svc = SVC(C=0.25)
svc.fit(training_data.iloc[:,1:],training_data.iloc[:,0])
print("--- Training took %s seconds ---" % (time.time() - start_time))
svc_pred = svc.predict(validation_data.iloc[:,1:])
print(acc(svc_pred, validation_data.iloc[:,0]))
print("--- %s seconds ---" % (time.time() - start_time))

--- Training took 978.3524339199066 seconds ---
0.583393293999
--- 1071.2892246246338 seconds ---


In [6]:
start_time = time.time()
sigmoid_svc = SVC(kernel='sigmoid')
sigmoid_svc.fit(training_data.iloc[:,1:],training_data.iloc[:,0])
print("--- Training took %s seconds ---" % (time.time() - start_time))
sigmoid_svc_pred = sigmoid_svc.predict(validation_data.iloc[:,1:])
print(acc(sigmoid_svc_pred, validation_data.iloc[:,0]))
print("--- Total time: %s seconds ---" % (time.time() - start_time))

--- Training took 1085.9452102184296 seconds ---
0.519715066916
--- Total time: 1223.8227014541626 seconds ---


In [5]:
start_time = time.time()
linear_svc = SVC(kernel='linear', C=0.05)
linear_svc.fit(training_data.iloc[:,1:],training_data.iloc[:,0])
print("--- Training took %s seconds ---" % (time.time() - start_time))
linear_svc_pred = linear_svc.predict(validation_data.iloc[:,1:])
print(acc(linear_svc_pred, validation_data.iloc[:,0]))
print("--- Total time: %s seconds ---" % (time.time() - start_time))

--- Training took 1164.3281786441803 seconds ---
0.603396172111
--- Total time: 1243.3781158924103 seconds ---


In [6]:
# joblib.dump(linear_svc, 'linear_svc_c0.05.pkl')
# linear_svc = joblib.load('linear_svc.pkl')

['linear_svc_c0.05.pkl']

In [6]:
linear_svc_pred = linear_svc.predict(validation_data.iloc[:,1:])
print(acc(linear_svc_pred, validation_data.iloc[:,0]))

0.597855806591


In [5]:
start_time = time.time()
poly_svc = SVC(kernel='poly', C=0.10)
poly_svc.fit(training_data.iloc[:,1:],training_data.iloc[:,0])
print("--- Training took %s seconds ---" % (time.time() - start_time))
poly_svc_pred = poly_svc.predict(validation_data.iloc[:,1:])
print(acc(poly_svc_pred, validation_data.iloc[:,0]))
print("--- Total time: %s seconds ---" % (time.time() - start_time))

--- Training took 15299.048968791962 seconds ---
0.584328680386
--- Total time: 15342.022331953049 seconds ---


In [6]:
joblib.dump(poly_svc, 'poly_svc_c0.10.pkl')

['poly_svc_c0.10.pkl']

In [7]:
start_time = time.time()
lin_svc_pca = SVC(kernel='linear')
lin_svc_pca.fit(training_data_pca.iloc[:,1:],training_data_pca.iloc[:,0])
print("--- Training took %s seconds ---" % (time.time() - start_time))
lin_svc_pred_pca = lin_svc_pca.predict(validation_data_pca.iloc[:,1:])
print(acc(lin_svc_pred_pca, validation_data_pca.iloc[:,0]))
print("--- Total time: %s seconds ---" % (time.time() - start_time))

In [26]:
start_time = time.time()
log_reg = LR(solver='sag', max_iter=500, C=0.05)
# log_reg = LR(C=0.05)
log_reg.fit(training_data.iloc[:,1:],training_data.iloc[:,0])
print("--- Training took %s seconds ---" % (time.time() - start_time))
log_reg_pred = log_reg.predict(validation_data.iloc[:,1:])
print(acc(log_reg_pred, validation_data.iloc[:,0]))
print("--- Total time: %s seconds ---" % (time.time() - start_time))
# 0.598287523385, 0.598143617787

--- Training took 32.70646333694458 seconds ---
0.598287523385
--- Total time: 32.71646428108215 seconds ---


In [36]:
start_time = time.time()
knc = KNC(weights='distance')
knc.fit(training_data.iloc[:,4:],training_data.iloc[:,0])
print("--- Training took %s seconds ---" % (time.time() - start_time))
knc_pred = knc.predict(validation_data.iloc[:,4:])
print(acc(knc_pred, validation_data.iloc[:,0]))
print("--- Total time: %s seconds ---" % (time.time() - start_time))

--- Training took 13.272883892059326 seconds ---
0.533098287523
--- Total time: 285.7741982936859 seconds ---
