In [2]:
import pandas
import time
from sklearn.model_selection import train_test_split as tts
from sklearn.metrics import accuracy_score as acc
from sklearn.externals import joblib
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.linear_model import Perceptron
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier as MLPC
from sklearn.naive_bayes import GaussianNB as GNB
from sklearn.naive_bayes import BernoulliNB as BNB
from sklearn.naive_bayes import MultinomialNB as MNB
from sklearn.preprocessing import MinMaxScaler as MMS


dota_data = pandas.read_csv('dota2Train.csv', header = None)

In [3]:
(training_data, validation_data) = tts(dota_data, test_size = .15)

mms = MMS()
training_data_temp = mms.fit_transform(training_data)
validation_data_temp = mms.fit_transform(validation_data)

training_data_mms = pandas.DataFrame(data=training_data_temp[:,:])
validation_data_mms = pandas.DataFrame(data=validation_data_temp[:,:])

In [62]:
rfc = RFC(n_jobs=-1, n_estimators=100, max_features=0.5)
rfc.fit(training_data.iloc[:,1:],training_data.iloc[:,0])
rfc_pred = rfc.predict(validation_data.iloc[:,1:])
print(acc(rfc_pred, validation_data.iloc[:,0]))

0.569886670264


In [66]:
perc = Perceptron(n_jobs=-1, n_iter=1001,eta0=0.1)
perc.fit(training_data.iloc[:,1:],training_data.iloc[:,0])
perc_pred = perc.predict(validation_data.iloc[:,1:])
print(acc(perc_pred, validation_data.iloc[:,0]))

0.58553696708


In [16]:
mlpc = MLPC(hidden_layer_sizes=(115,115,115,115,115,115), activation='tanh', max_iter=500)
mlpc.fit(training_data.iloc[:,1:],training_data.iloc[:,0])
mlpc_pred = mlpc.predict(validation_data.iloc[:,1:])
print(acc(mlpc_pred, validation_data.iloc[:,0]))

0.59422558014


In [23]:
bnb = BNB()
bnb.fit(training_data.iloc[:,1:],training_data.iloc[:,0])
bnb_pred = bnb.predict(validation_data.iloc[:,1:])
print(acc(bnb_pred, validation_data.iloc[:,0]))

0.567203914232


In [22]:
gnb = GNB()
gnb.fit(training_data.iloc[:,1:],training_data.iloc[:,0])
gnb_pred = gnb.predict(validation_data.iloc[:,1:])
print(acc(gnb_pred, validation_data.iloc[:,0]))

0.56461361347


In [21]:
mnb = MNB()
mnb.fit(training_data_mms.iloc[:,1:],training_data_mms.iloc[:,0])
mnb_pred = mnb.predict(validation_data_mms.iloc[:,1:])
print(acc(mnb_pred, validation_data_mms.iloc[:,0]))

0.527917685998


In [None]:
start_time = time.time()
svc = SVC(C=0.25)
svc.fit(training_data.iloc[:,1:],training_data.iloc[:,0])
print("--- Training took %s seconds ---" % (time.time() - start_time))
svc_pred = svc.predict(validation_data.iloc[:,1:])
print(acc(svc_pred, validation_data.iloc[:,0]))
print("--- %s seconds ---" % (time.time() - start_time))

--- Training took 978.3524339199066 seconds ---
0.583393293999
--- 1071.2892246246338 seconds ---


In [6]:
start_time = time.time()
sigmoid_svc = SVC(kernel='sigmoid')
sigmoid_svc.fit(training_data.iloc[:,1:],training_data.iloc[:,0])
print("--- Training took %s seconds ---" % (time.time() - start_time))
sigmoid_svc_pred = sigmoid_svc.predict(validation_data.iloc[:,1:])
print(acc(sigmoid_svc_pred, validation_data.iloc[:,0]))
print("--- Total time: %s seconds ---" % (time.time() - start_time))

--- Training took 1085.9452102184296 seconds ---
0.519715066916
--- Total time: 1223.8227014541626 seconds ---


In [7]:
start_time = time.time()
linear_svc = SVC(kernel='linear')
linear_svc.fit(training_data.iloc[:,1:],training_data.iloc[:,0])
print("--- Training took %s seconds ---" % (time.time() - start_time))
linear_svc_pred = linear_svc.predict(validation_data.iloc[:,1:])
print(acc(linear_svc_pred, validation_data.iloc[:,0]))
print("--- Total time: %s seconds ---" % (time.time() - start_time))

--- Training took 6568.138354301453 seconds ---
0.599150956972
--- Total time: 6646.877037763596 seconds ---


In [8]:
joblib.dump(linear_svc, 'linear_svc.pkl')

['linear_svc.pkl']