In [1]:
from preprocessing.feature_engineering import BaselineClassifier
from preprocessing.turn_sampling import firstStrategy, randomStrategy, secondLastStrategy, middleStrategy, entireMatchStrategy, sampleTurns
from sklearn.compose import ColumnTransformer, make_column_selector
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np

In [None]:
#from preprocessing.data_processing import parseMatchTurns
#gameState = parseMatchTurns()
#gameState["p1Outspeed"] = gameState["p1activeSpeed"] > gameState["p2activeSpeed"]
#gameState["p1MoreHP"] = gameState["p1activeHP"] > gameState["p2activeHP"]
#gameState.to_csv("cleaned_data/gameState.csv", index=False)

In [None]:
gameState = pd.read_csv("cleaned_data/gameState.csv")
labels = pd.read_csv("cleaned_data/labels.csv")

In [None]:
labels.head()

Unnamed: 0,match_turn,player1wins,match,turnNumber
0,"[Gen 1] Random Battle-10071,51491,20071,43286-...",1,"[Gen 1] Random Battle-10071,51491,20071,43286",0
1,"[Gen 1] Random Battle-10071,51491,20071,43286-...",1,"[Gen 1] Random Battle-10071,51491,20071,43286",1
2,"[Gen 1] Random Battle-10071,51491,20071,43286-...",1,"[Gen 1] Random Battle-10071,51491,20071,43286",13
3,"[Gen 1] Random Battle-10071,51491,20071,43286-...",1,"[Gen 1] Random Battle-10071,51491,20071,43286",14
4,"[Gen 1] Random Battle-10071,51491,20071,43286-...",1,"[Gen 1] Random Battle-10071,51491,20071,43286",23


In [None]:
strategies = {"firstStrategy (predicting who wins in turn 0)": firstStrategy, 
             "secondLastStrategy (predicting who wins in the final turn)": secondLastStrategy,
             "middleStrategy (predicting who wins in the middle of the match)": middleStrategy,
             "randomStrategy (sampling a random turn and predicting who wins)": randomStrategy,
             "entireMatchStrategy (training on and predicting multiple turns of different matches)": entireMatchStrategy}

In [None]:
def entireMatchSplit(labels):
    train, test = entireMatchStrategy(labels)
    train = pd.merge(train, gameState)
    test = pd.merge(test, gameState)
    X_train = train.drop("player1wins", axis=1)
    y_train = train["player1wins"]
    X_test = test.drop("player1wins", axis=1)
    y_test = test["player1wins"]
    return X_train, X_test, y_train, y_test


In [None]:
gameState

Unnamed: 0,p1pkmn1name,p1pkmn1status,p1pkmn1RemainingHp,p1pkmn1move1,p1pkmn1move2,p1pkmn1move3,p1pkmn1move4,p1pkmn1atk,p1pkmn1def,p1pkmn1spa,...,p2sortedHP1,p2sortedHP2,p2sortedHP3,p2sortedHP4,p2sortedHP5,p2sortedHP6,pokemonLeftDiff,match_turn,p1Outspeed,p1MoreHP
0,paras,healthy,1.000000,stunspore,spore,megadrain,bodyslam,210,183,183,...,1.000000,1.000000,1.000000,1.0,1.000000,1.000000,0,"[Gen 1] Random Battle-10071,51491,20071,43286-...",False,False
1,graveler,healthy,1.000000,earthquake,bodyslam,explosion,rockslide,231,263,151,...,1.000000,1.000000,1.000000,1.0,1.000000,1.000000,0,"[Gen 1] Random Battle-10071,51491,20071,43286-...",False,False
2,paras,healthy,0.601660,stunspore,spore,megadrain,bodyslam,210,183,183,...,0.000000,0.000000,0.513158,1.0,1.000000,1.000000,1,"[Gen 1] Random Battle-10071,51491,20071,43286-...",True,False
3,mewtwo,healthy,0.275862,psychic,amnesia,thunderbolt,blizzard,199,174,253,...,0.000000,0.000000,0.513158,1.0,1.000000,1.000000,0,"[Gen 1] Random Battle-10071,51491,20071,43286-...",True,False
4,chansey,healthy,0.203742,sing,softboiled,thunderwave,icebeam,75,75,211,...,0.000000,0.000000,0.000000,0.0,0.513158,1.000000,2,"[Gen 1] Random Battle-10071,51491,20071,43286-...",True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11095,charizard,healthy,0.363309,hyperbeam,swordsdance,fireblast,bodyslam,205,196,207,...,0.000000,0.000000,0.000000,0.0,0.000000,1.000000,1,"[Gen 1] Random Battle-9939,58,62081,43766-turn...",True,False
11096,moltres,healthy,1.000000,reflect,fireblast,hyperbeam,agility,230,215,269,...,0.000000,0.000000,0.000000,0.0,0.000000,0.386454,0,"[Gen 1] Random Battle-9939,58,62081,43766-turn...",True,True
11097,moltres,healthy,1.000000,reflect,fireblast,hyperbeam,agility,230,215,269,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,1,"[Gen 1] Random Battle-9939,58,62081,43766-turn...",False,False
11098,charizard,healthy,1.000000,hyperbeam,swordsdance,fireblast,bodyslam,205,196,207,...,0.205387,1.000000,1.000000,1.0,1.000000,1.000000,-1,"[Gen 1] Random Battle-9939,58,62081,43766-turn...",True,True


In [None]:
def regularStrategySplit(labels):
    sample = sampleTurns(labels, strategy)
    sample = pd.merge(sample, gameState)
    X = sample.drop("player1wins", axis=1)
    y = sample["player1wins"]
    return train_test_split(X, y)

In [None]:
from xgboost import XGBClassifier
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
from sklearn.linear_model import LogisticRegressionCV
from sklearn.svm import LinearSVC, SVC
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier

In [None]:
transformer_ = ColumnTransformer([
    ("scaler", StandardScaler(), ["p1sumBST", "p2sumBST", "turnNumber"]),
    ("OHE", OneHotEncoder(handle_unknown="ignore"), ["pokemonLeftDiff"])
])

transformer = ColumnTransformer([
    ("scaler", StandardScaler(), ["p1sumBST", "p2sumBST", "turnNumber", "pokemonLeftDiff", "p1activeHP", "p2activeHP"]),
    ("OHE", OneHotEncoder(handle_unknown="ignore"), ["p1Outspeed", "p1MoreHP"]),
    ("hpScaler", StandardScaler(), make_column_selector(pattern="p.sortedHP."))
])


lrPipe = Pipeline([
    ("transformer",transformer),
    ("classifier", LogisticRegressionCV(max_iter=10000))
])

svmPipe = Pipeline([
    ("transformer",transformer),
    ("classifier", LinearSVC())
])

xgbPipe = Pipeline([
    ("transformer",transformer),
    ("classifier", XGBClassifier(verbosity=0, use_label_encoder=False))
])

rfPipe = Pipeline([
    ("transformer",transformer),
    ("classifier", RandomForestClassifier())
])


lrPipe2 = Pipeline([
    ("transformer",transformer_),
    ("classifier", LogisticRegressionCV(max_iter=10000))
])

svmPipe2 = Pipeline([
    ("transformer",transformer_),
    ("classifier", LinearSVC())
])


rbfSVMPipe = Pipeline([
    ("transformer",transformer_),
    ("classifier", SVC())
])

C_range = 2. ** np.arange(-8, 8)
gamma_range  = 2. ** np.arange(-5, 5)


param_grid = {"classifier__C":C_range}


svmPipe = GridSearchCV(svmPipe, param_grid=param_grid, n_jobs=-1)
svmPipe2 = GridSearchCV(svmPipe2, param_grid=param_grid, n_jobs=-1)

baselinePipe = Pipeline([
    ("baselineClassifier", BaselineClassifier("pokemonLeftDiff"))
])

~~First, most basic feature: pokémon difference between p1 and p2~~

~~=> model~~

~~Sorted list of HP remaining~~

~~=> Model~~

~~base stat totals~~

~~=> Model~~

~~Additional features: status? active mon remaining HP? Boosts?~~

~~=> Model~~

Map moves to types (think of a smarter feature)

=> Model


In [None]:
for stratname, strategy in strategies.items():
    print(f"Starting: {stratname}")
    if strategy == entireMatchStrategy:
        X_train, X_test, y_train, y_test = entireMatchSplit(labels)
    else:
        X_train, X_test, y_train, y_test = regularStrategySplit(labels)

    y_pred_base = baselinePipe.predict(X_test)
    accuracy_base = accuracy_score(y_pred_base, y_test)
    
    rfPipe.fit(X_train, y_train)
    y_pred_rf = rfPipe.predict(X_test)
    accuracy_rf = accuracy_score(y_pred_rf, y_test)
    

    lrPipe.fit(X_train, y_train)
    y_pred_lr = lrPipe.predict(X_test)
    accuracy_lr = accuracy_score(y_pred_lr, y_test)

    svmPipe.fit(X_train, y_train)
    y_pred_svm = svmPipe.predict(X_test)
    accuracy_svm = accuracy_score(y_pred_svm, y_test)

    xgbPipe.fit(X_train, y_train)
    y_pred_xgb = xgbPipe.predict(X_test)
    accuracy_xgb = accuracy_score(y_pred_xgb, y_test)

    
    lrPipe2.fit(X_train, y_train)
    y_pred_lr2 = lrPipe2.predict(X_test)
    accuracy_lr2 = accuracy_score(y_pred_lr2, y_test)

    svmPipe2.fit(X_train, y_train)
    y_pred_svm2 = svmPipe2.predict(X_test)
    accuracy_svm2 = accuracy_score(y_pred_svm2, y_test)

    print(f"Training on {len(y_train)} samples")
    print(f"The accuracy on the test for {stratname} is: {accuracy_base} for the baseline classifier")
    print(f"The accuracy on the test for {stratname} is: {accuracy_lr} for logistic regression")
    print(f"The accuracy on the test for {stratname} is: {accuracy_lr2} for logistic regression without HP")
    print(f"The accuracy on the test for {stratname} is: {accuracy_svm} for linear support vector machine")
    print(f"The accuracy on the test for {stratname} is: {accuracy_svm2} for linear support vector machine without HP")
    print(f"The accuracy on the test for {stratname} is: {accuracy_xgb} for gradient boosting")
    print(f"The accuracy on the test for {stratname} is: {accuracy_rf} for random forest")
    print("---------------------------------------------------------------")



Starting: firstStrategy (predicting who wins in turn 0)
Training on 747 samples
The accuracy on the test for firstStrategy (predicting who wins in turn 0) is: 0.46184738955823296 for the baseline classifier
The accuracy on the test for firstStrategy (predicting who wins in turn 0) is: 0.5220883534136547 for logistic regression
The accuracy on the test for firstStrategy (predicting who wins in turn 0) is: 0.5381526104417671 for logistic regression without HP
The accuracy on the test for firstStrategy (predicting who wins in turn 0) is: 0.5261044176706827 for linear support vector machine
The accuracy on the test for firstStrategy (predicting who wins in turn 0) is: 0.5381526104417671 for linear support vector machine without HP
The accuracy on the test for firstStrategy (predicting who wins in turn 0) is: 0.5180722891566265 for gradient boosting
The accuracy on the test for firstStrategy (predicting who wins in turn 0) is: 0.5261044176706827 for random forest
---------------------------