In [1]:
def updater():
    import ipynb.fs.defs.Web_Get as wg
    from tqdm import tqdm
    print(dir(wg))
    events = wg.get_all_events(6, False)
    
    with tqdm(total=len(events)) as pbar:
        for event in events:
            #print(event)
            new=True
            wg.get_rankings(event, new=new)
            wg.get_matches(event, new=new)
            wg.get_first_pred(event, new=new)
            wg.more_team_stats(event, new=new)
            wg.predict_matches(event, new=new)
            pbar.update(1)

In [2]:
#updater()

In [3]:
def svc_model(C, g, X_train, y_train, beta, random_state, return_grid_fit=False):
    from sklearn.model_selection import StratifiedShuffleSplit, GridSearchCV
    from sklearn.metrics import fbeta_score, accuracy_score, make_scorer, average_precision_score
    from sklearn.svm import SVC

    clf = SVC(random_state=random_state, cache_size=1000)
    clf.fit(X_train, y_train)
    
    parameters = {'C':C, 'tol':[1e-4], 'gamma':g}

    cv = StratifiedShuffleSplit(n_splits = 4, test_size = 0.20)

    scorer1 = make_scorer(fbeta_score, beta=beta)
    scorer2 = make_scorer(accuracy_score)
    scorer3 = make_scorer(average_precision_score)
    scoring = {'scorer1': scorer1, 'scorer2': scorer2,'scorer3': scorer3}

    grid_obj = GridSearchCV(clf, parameters, scoring=scoring, refit='scorer1', n_jobs=8, cv=cv)

    grid_fit = grid_obj.fit(X_train, y_train)

    best_clf = grid_fit.best_estimator_
    
    if return_grid_fit:
        return clf, best_clf, grid_fit
    else:
        return clf, best_clf

In [4]:
def matches(event_type, with_rs=True, week=6, only=False, dim_add=True, C=[10.984375], g=[0.0028], num=0):
    from sklearn.metrics import fbeta_score, accuracy_score, average_precision_score
    import ipynb.fs.defs.Web_Get as wg
    from tqdm import tqdm
    import numpy as np
    import pandas as pd
    
    beta = 0.5
    random_state = 42
    
    if event_type == 'cmps':
        events = wg.get_cmps()
    elif event_type == 'dist_cmps':
        events = wg.get_dist_cmps()
    elif event_type == 'qm':
        events = wg.get_all_events(week, only)
    
    n = 0
    with tqdm(total=len(events)) as pbar:
        for event in events:
            stats, blue_wins = wg.predict_matches(event)
            y_tmp = blue_wins
            x_tmp = stats
            if n == 0:
                X = x_tmp
                y = y_tmp
                n += 1
            else:
                X = X.append(x_tmp, ignore_index=True)
                y = np.append(y, y_tmp)
            pbar.update(1)
    if not with_rs:
        X.drop(['Ranking_Score_a', 'tba_rpEarned_OAVE', 'tba_rpEarned_CPR', 'tba_rpEarned_OPR', 'tba_rpEarned_DAVE', 'tba_rpEarned_DPR',
               'Ranking_Score_a_from_mean', 'tba_rpEarned_OAVE_from_mean', 'tba_rpEarned_CPR_from_mean', 'tba_rpEarned_OPR_from_mean', 'tba_rpEarned_DAVE_from_mean', 'tba_rpEarned_DPR_from_mean'], axis=1, inplace=True)
    print(X.shape)
    print(y.shape)
    
    if dim_add == True:
        from sklearn.random_projection import GaussianRandomProjection
        from sklearn.decomposition import PCA
        grp = GaussianRandomProjection(n_components=20, random_state=random_state)
        grp.fit(X)
        grp_data = grp.transform(X)

        pca = PCA(n_components=40, svd_solver='randomized')
        pca.fit(X)
        pca_data = pca.transform(X)

        for i in range(grp_data.shape[1]):
            X['grp'+str(i)] = grp_data[:,i]

        for i in range(pca_data.shape[1]):
            #print(pca_data[:,i])
            X['pca'+str(i)] = pca_data[:,i]
    
    from sklearn.preprocessing import MinMaxScaler
    headers = X.columns.values
    fit_scaler = MinMaxScaler([-1,1]).fit(X[headers])
    X[headers] = fit_scaler.transform(X[headers])
    
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X, 
                                                        y, 
                                                        test_size = 0.15,
                                                        random_state=random_state)

    print("Training set has {} samples and {} features.".format(X_train.shape[0], X_train.shape[1]))
    print("Testing set has {} samples and {} features.".format(X_test.shape[0], X_train.shape[1]))

    if num == 0:
        clf, best_clf = svc_model(C, g, X_train, y_train, beta, random_state)
    else:
        clf, best_clf, grid_fit = svc_model(C, g, X_train, y_train, beta, random_state, True)
        pd.DataFrame(grid_fit.cv_results_).to_csv('matches' + str(num) + '.csv', sep=',')
    print(best_clf)

    predictions = clf.predict(X_test)
    predictions_x = clf.predict(X_train)
    #print(clf.get_params)
    best_predictions = best_clf.predict(X_test)
    best_predictions_x = best_clf.predict(X_train)

    print()
    print("Unoptimized model\n------")
    print("Accuracy score on training data: {:.4f}".format(accuracy_score(y_train, predictions_x)))
    print("Accuracy score on testing data: {:.4f}".format(accuracy_score(y_test, predictions)))
    print("F-score on testing data: {:.4f}".format(fbeta_score(y_test, predictions, beta = beta)))
    print("Average precision score on testing data: {:.4f}".format(average_precision_score(y_test, predictions)))
    print("\nOptimized Model\n------")
    print("Final accuracy score on training data: {:.4f}".format(accuracy_score(y_train, best_predictions_x)))
    print("Final accuracy score on the testing data: {:.4f}".format(accuracy_score(y_test, best_predictions)))
    print("Final F-score on the testing data: {:.4f}".format(fbeta_score(y_test, best_predictions, beta = beta)))
    print("Final average precision score on testing data: {:.4f}".format(average_precision_score(y_test, best_predictions)))
    print()

    #pd.DataFrame(grid_fit.cv_results_).to_csv('future2.csv', sep=',')

    test = pd.DataFrame(np.matmul(best_clf.dual_coef_, best_clf.support_vectors_)+best_clf.intercept_, columns=X.columns.values)
    test_t = test.transpose()
    print(test_t)
    print(test_t.nlargest(15, 0))
    print()
    print(test_t.nsmallest(15, 0))
    

In [5]:
#matches('dist_cmps', False)

In [6]:
def future_matches_pred(week, with_rs=True, only=False, dim_add=True, C=[10.984375], g=[0.0028], num=0):
    from sklearn.metrics import fbeta_score, accuracy_score, average_precision_score
    import ipynb.fs.defs.Web_Get as wg
    from tqdm import tqdm
    import numpy as np
    import pandas as pd
    
    beta = 0.5
    random_state = 42
    
    if week <= 6:
        events = wg.get_all_events(week=week-1, only=only)
    elif week == 7:
        events = wg.get_all_events(week=6, only=only)
    elif week ==8:
        events = wg.get_all_events(week=6, only=only)
    
    n = 0
    with tqdm(total=len(events)) as pbar:
        for event in events:
            stats, blue_wins = wg.predict_matches(event)
            y_tmp = blue_wins
            x_tmp = stats
            if n == 0:
                X = x_tmp
                y = y_tmp
                n += 1
            else:
                X = X.append(x_tmp, ignore_index=True)
                y = np.append(y, y_tmp)
            pbar.update(1)
    if not with_rs:
        X.drop(['Ranking_Score_a', 'tba_rpEarned_OAVE', 'tba_rpEarned_CPR', 'tba_rpEarned_OPR', 'tba_rpEarned_DAVE', 'tba_rpEarned_DPR',
               'Ranking_Score_a_from_mean', 'tba_rpEarned_OAVE_from_mean', 'tba_rpEarned_CPR_from_mean', 'tba_rpEarned_OPR_from_mean', 'tba_rpEarned_DAVE_from_mean', 'tba_rpEarned_DPR_from_mean'], axis=1, inplace=True)
    print(X.shape)
    print(y.shape)
    
    from sklearn.random_projection import GaussianRandomProjection
    from sklearn.decomposition import PCA
    
    if dim_add == True:
        grp = GaussianRandomProjection(n_components=20, random_state=random_state)
        grp.fit(X)
        grp_data = grp.transform(X)

        pca = PCA(n_components=40, svd_solver='randomized')
        pca.fit(X)
        pca_data = pca.transform(X)

        for i in range(grp_data.shape[1]):
            X['grp'+str(i)] = grp_data[:,i]

        for i in range(pca_data.shape[1]):
            X['pca'+str(i)] = pca_data[:,i]
    
    from sklearn.preprocessing import MinMaxScaler
    headers = X.columns.values
    fit_scaler = MinMaxScaler([-1,1]).fit(X[headers])
    X[headers] = fit_scaler.transform(X[headers])
    
    if num == 0:
        clf, best_clf = svc_model(C, g, X, y, beta, random_state)
    else:
        clf, best_clf, grid_fit = svc_model(C, g, X, y, beta, random_state, True)
        pd.DataFrame(grid_fit.cv_results_).to_csv('future_matches' + str(num) + '.csv', sep=',')
    
    if week <= 6:
        events = wg.get_all_events(week=week, only=True)
    elif week == 7:
        events = wg.get_dist_cmps()
    elif week ==8:
        events = wg.get_cmps()
    
    n = 0
    with tqdm(total=len(events)) as pbar:
        for event in events:
            stats, blue_wins = wg.predict_matches(event)
            preds = wg.get_first_pred(event)
            
            y_tmp = blue_wins
            x_tmp = stats
            
            pred = np.array([1 if x=='blue' else 0 for x in preds[:,0]])
            actual = np.array([1 if x=='blue' else 0 for x in preds[:,1]])
            
            if n == 0:
                X1 = x_tmp
                y1 = y_tmp
                pred1 = pred
                actual1 = actual
                n += 1
            else:
                X1 = X1.append(x_tmp, ignore_index=True)
                y1 = np.append(y1, y_tmp)
                pred1 = np.append(pred1, pred)
                actual1 = np.append(actual1, actual)
            pbar.update(1)
    
    if dim_add == True:
        grp_data = grp.transform(X1)
        pca_data = pca.transform(X1)

        for i in range(grp_data.shape[1]):
            X1['grp'+str(i)] = grp_data[:,i]

        for i in range(pca_data.shape[1]):
            X1['pca'+str(i)] = pca_data[:,i]

    headers = X1.columns.values
    X1[headers] = fit_scaler.transform(X1[headers])
    if not with_rs:
        X.drop(['Ranking_Score_a', 'tba_rpEarned_OAVE', 'tba_rpEarned_CPR', 'tba_rpEarned_OPR', 'tba_rpEarned_DAVE', 'tba_rpEarned_DPR',
                'Ranking_Score_a_from_mean', 'tba_rpEarned_OAVE_from_mean', 'tba_rpEarned_CPR_from_mean', 'tba_rpEarned_OPR_from_mean', 'tba_rpEarned_DAVE_from_mean', 'tba_rpEarned_DPR_from_mean'], axis=1, inplace=True)

    predictions = best_clf.predict(X1)

    print(accuracy_score(y1, predictions))
    print(fbeta_score(y1, predictions, beta = beta))
    print(average_precision_score(y1, predictions))
    print()
    print(accuracy_score(actual1, pred1))
    print(fbeta_score(actual1, pred1, beta = beta))
    print(average_precision_score(actual1, pred1))

In [7]:
#future_matches_pred(6)