In [6]:
import numpy as np
import pandas as pd
from scipy.sparse import csgraph
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV, RidgeClassifier, RidgeClassifierCV,\
SGDClassifier, Perceptron, PassiveAggressiveClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import BernoulliNB
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from sklearn.neighbors import KNeighborsClassifier, NearestCentroid, RadiusNeighborsClassifier
from sklearn.semi_supervised import LabelPropagation, LabelSpreading
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.svm import LinearSVC, NuSVC, SVC
from sklearn.neural_network import MLPClassifier
from sklearn.gaussian_process import GaussianProcessClassifier

In [7]:
def labelencode_dataframe(dataframe_columns, classlist, column_names):
    le = LabelEncoder().fit(classlist)
    if len(dataframe_columns.shape) == 1:
        df_new = pd.DataFrame(le.transform(dataframe_columns), columns=column_names)
    else:
        df_new = dataframe_columns.apply(le.transform)
        df_new.columns = column_names
    return df_new

In [8]:
def dataframe_tolist(dataframe):
    ret_list = []
    headers = list(dataframe)
    for header in headers:
        ret_list.extend(list(set(dataframe[header])))
    return list(set(ret_list))

In [9]:
data = pd.read_csv('psl_data2.csv', header=0, delimiter=',')
#Separating all features for encoding
teams = list(set(sum([list(data[['team1']]), list(data['team2'])], [])))
venues = list(set((data['venue'])))
toss_decisions = list(set(data['toss_decision']))
players = dataframe_tolist(data.iloc[:,2:24])
#encoding.
team1_new = labelencode_dataframe(data['team1'], teams, ['team1'])
team2_new = labelencode_dataframe(data['team2'], teams, ['team2'])
toss_winner_new = labelencode_dataframe(data['toss_winner'], teams, ['toss_winner'])
winner_new = labelencode_dataframe(data['winner'], teams, ['winner'])
venue_new = labelencode_dataframe(data['venue'], venues, ['venue'])
toss_decision_new = labelencode_dataframe(data['toss_decision'], toss_decisions, ['toss_decision'])
players_team1_new = labelencode_dataframe(data.iloc[:,2:13], players, list(data.iloc[:,2:13].columns))
players_team2_new = labelencode_dataframe(data.iloc[:,13:24], players, list(data.iloc[:,13:24].columns))
players_team1_new_aug = labelencode_dataframe(data.iloc[:,13:24], players, list(data.iloc[:,2:13].columns))
players_team2_new_aug = labelencode_dataframe(data.iloc[:,2:13], players, list(data.iloc[:,13:24].columns))
#New Encoded Data
data_new = pd.concat([team1_new, team2_new, players_team1_new, players_team2_new, venue_new, toss_winner_new,
                      toss_decision_new, winner_new], axis=1)
data_new_2 = pd.concat([team2_new.rename({'team2': 'team1'}, axis=1), team1_new.rename({'team1': 'team2'}, axis=1),
                        players_team1_new_aug, players_team2_new_aug, venue_new, toss_winner_new, toss_decision_new,
                        winner_new], axis=1)
data_final = data_new.append(data_new_2)
#Features
X = pd.get_dummies(data.iloc[:,0:27])
#Labels
Y = pd.DataFrame(np.where(data_final['team1']==data_final['winner'], 1, 0))

In [10]:
cvs = cross_val_score(LogisticRegression(), X, Y, cv=5)
print "Cross Validation Logistic Regression: ", cvs.mean(), '+/-', cvs.std()

ValueError: Found input variables with inconsistent numbers of samples: [80, 160]

In [11]:
cvs = cross_val_score(RandomForestClassifier(max_depth=5, random_state=0), X, Y, cv=5)
print "Cross Validation RandomForest: ", cvs.mean(), '+/-', cvs.std()

ValueError: Found input variables with inconsistent numbers of samples: [80, 160]

In [12]:
cvs = cross_val_score(BernoulliNB(), X, Y, cv=5)
print "Cross Validation BernoulliNB: ", cvs.mean(), '+/-', cvs.std()

ValueError: Found input variables with inconsistent numbers of samples: [80, 160]

In [None]:
cvs = cross_val_score(DecisionTreeClassifier(), X, Y, cv=5)
print "Cross Validation DecisionTreeClassifier: ", cvs.mean(), '+/-', cvs.std()

In [None]:
cvs = cross_val_score(ExtraTreeClassifier(), X, Y, cv=5)
print "Cross Validation ExtraTreeClassifier: ", cvs.mean(), '+/-', cvs.std()

In [None]:
cvs = cross_val_score(ExtraTreesClassifier(), X, Y, cv=5)
print "Cross Validation ExtraTreesClassifier: ", cvs.mean(), '+/-', cvs.std()

In [None]:
cvs = cross_val_score(KNeighborsClassifier(), X, Y, cv=5)
print "Cross Validation KNeighborsClassifier: ", cvs.mean(), '+/-', cvs.std()

In [None]:
cvs = cross_val_score(LabelPropagation(), X, Y, cv=5).mean()
print "Cross Validation LabelPropagation: ", cvs.mean(), '+/-', cvs.std()

In [None]:
cvs = cross_val_score(LabelSpreading(), X, Y, cv=5)
print "Cross Validation LabelSpreading: ", cvs.mean(), '+/-', cvs.std()

In [None]:
cvs = cross_val_score(LinearDiscriminantAnalysis(), X, Y, cv=5)
print "Cross Validation LinearDiscriminantAnalysis: ", cvs.mean(), '+/-', cvs.std()

In [None]:
cvs = cross_val_score(LinearSVC(), X, Y, cv=5)
print "Cross Validation LinearSVC: ", cvs.mean(), '+/-', cvs.std()

In [None]:
cvs = cross_val_score(LogisticRegressionCV(), X, Y, cv=5)
print "Cross Validation LogisticCV Regression: ", cvs.mean(), '+/-', cvs.std()

In [None]:
cvs = cross_val_score(MLPClassifier(solver='lbfgs', hidden_layer_sizes=(27, 12, 6, 3)), X, Y, cv=5)
print "Cross Validation ANN: ", cvs.mean(), '+/-', cvs.std()

In [None]:
cvs = cross_val_score(NearestCentroid(), X, Y, cv=5)
print "Cross Validation NearestCentroid: ", cvs.mean(), '+/-', cvs.std()

In [None]:
cvs = cross_val_score(QuadraticDiscriminantAnalysis(), X, Y, cv=5)
print "Cross Validation QuadraticDiscriminantAnalysis: ", cvs.mean(), '+/-', cvs.std()

In [None]:
cvs = cross_val_score(RadiusNeighborsClassifier(radius=290), X, Y, cv=5)
print "Cross Validation RadiusNeighborsClassifier: ", cvs.mean(), '+/-', cvs.std()

In [None]:
cvs = cross_val_score(RidgeClassifier(), X, Y, cv=5)
print "Cross Validation RidgeClassifier: ", cvs.mean(), '+/-', cvs.std()

In [None]:
cvs = cross_val_score(RidgeClassifierCV(), X, Y, cv=5)
print "Cross Validation RidgeClassifierCV: ", cvs.mean(), '+/-', cvs.std()

In [None]:
cvs = cross_val_score(NuSVC(nu=0.31), X, Y, cv=5)
print "Cross Validation NuSVC: ", cvs.mean(), '+/-', cvs.std()

In [None]:
cvs = cross_val_score(SVC(), X, Y, cv=5)
print "Cross Validation SVC: ", cvs.mean(), '+/-', cvs.std()

In [None]:
cvs = cross_val_score(GaussianProcessClassifier(), X, Y, cv=5)
print "Cross Validation GaussianProcessClassifier: ", cvs.mean(), '+/-', cvs.std()

In [None]:
cvs = cross_val_score(GradientBoostingClassifier(), X, Y, cv=5)
print "Cross Validation GradientBoostingClassifier: ", cvs.mean(), '+/-', cvs.std()

In [None]:
cvs = cross_val_score(SGDClassifier(), X, Y, cv=5)
print "Cross Validation SGDClassifier: ", cvs.mean(), '+/-', cvs.std()

In [None]:
cvs = cross_val_score(Perceptron(), X, Y, cv=5)
print "Cross Validation Perceptron: ", cvs.mean(), '+/-', cvs.std()

In [None]:
cvs = cross_val_score(PassiveAggressiveClassifier(), X, Y, cv=5)
print "Cross Validation PassiveAggressiveClassifier: ", cvs.mean(), '+/-', cvs.std()