In [1]:
import pandas as pd

In [2]:
# Import data into Pandas Dataframes. 
# By setting header=None, we ensure pandas does not interpret the first row as headers.
dota2_train_df = pd.read_csv("dota2Train.csv",header=None)
dota2_test_df = pd.read_csv("dota2Test.csv",header=None)

In [3]:
# Rename the Dataframe Columns, as x1,x2,...
# Keep in mind that the first column is the target
dota2_train_df.columns = ["target"] + [f"x{i}" for i in range(1,dota2_train_df.shape[1])]
dota2_test_df.columns = ["target"] + [f"x{i}" for i in range(1,dota2_test_df.shape[1])]

In [4]:
# Display the first 5 instances, to check if everything works fine
dota2_train_df.head()

Unnamed: 0,target,x1,x2,x3,x4,x5,x6,x7,x8,x9,...,x107,x108,x109,x110,x111,x112,x113,x114,x115,x116
0,-1,223,2,2,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,152,2,2,0,0,0,1,0,-1,...,0,0,0,0,0,0,0,0,0,0
2,1,131,2,2,0,0,0,1,0,-1,...,0,0,0,0,0,0,0,0,0,0
3,1,154,2,2,0,0,0,0,0,0,...,-1,0,0,0,0,0,0,0,0,0
4,-1,171,2,3,0,0,0,0,0,-1,...,0,0,0,0,0,0,0,0,0,0


In [5]:
# Let's now create the validation set, picking randomly from the train test, so we can evaluate the model.
# First, we need to split the input columns with the target column for both training and test sets.
from sklearn.model_selection import train_test_split
X_train = dota2_train_df.drop(["target"], axis=1)
y_train = dota2_train_df["target"]
X_test = dota2_test_df.drop(["target"], axis=1)
y_test = dota2_test_df["target"]
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)

In [6]:
print(f"Train set shape: {X_train.shape}")
print(f"Validation set shape: {X_val.shape}")
print(f"Test set shape: {X_test.shape}")

Train set shape: (74120, 116)
Validation set shape: (18530, 116)
Test set shape: (10294, 116)


In [7]:
from keras.models import Sequential
from keras.layers import Dense, Input
model = Sequential()
model.add(Input(shape=(116,)))
model.add(Dense(116, activation = "relu"))
model.add(Dense(50, activation = "relu"))
model.add(Dense(1))

In [8]:
model.compile(loss="binary_crossentropy",optimizer="adam",metrics=["accuracy"])
model.summary()

In [9]:
history = model.fit(X_train,y_train,validation_data=(X_val,y_val),epochs=20,batch_size=32,verbose=1)

Epoch 1/20
[1m2317/2317[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step - accuracy: 0.5233 - loss: 15.2000 - val_accuracy: 0.5304 - val_loss: 14.9737
Epoch 2/20
[1m2317/2317[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.5260 - loss: 15.1121 - val_accuracy: 0.5304 - val_loss: 14.9737
Epoch 3/20
[1m2317/2317[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 2ms/step - accuracy: 0.5252 - loss: 15.1400 - val_accuracy: 0.5304 - val_loss: 14.9737
Epoch 4/20
[1m2317/2317[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.5239 - loss: 15.1814 - val_accuracy: 0.5304 - val_loss: 14.9737
Epoch 5/20
[1m2317/2317[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - accuracy: 0.5242 - loss: 15.1716 - val_accuracy: 0.5304 - val_loss: 14.9737
Epoch 6/20
[1m2317/2317[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.5274 - loss: 15.0689 - val_accuracy: 0.5304 - val_loss: 14.9737
Epoc

In [10]:
model.evaluate(X_test,y_test)

[1m322/322[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.5321 - loss: 14.9188


[14.84282398223877, 0.5344861149787903]

In [11]:
from sklearn.linear_model import LogisticRegression
import warnings
warnings.filterwarnings('ignore')
lr = LogisticRegression()
lr.fit(X_train,y_train)

In [12]:
lr.score(X_val,y_val)

0.6041554236373449