In [1]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, precision_score, roc_curve, auc
from sklearn.model_selection import GridSearchCV
from sklearn.naive_bayes import MultinomialNB


import matplotlib.pyplot as plt

def train_test_split_by_year(data, test_year):
    train_data = data[(data['year'] < test_year)]
    test_data = data[data['year'] == test_year]
    return train_data, test_data

def evaluate_model(model, train_data_original, test_data_original):
    
    train_data = train_data_original.copy()
    test_data = test_data_original.copy()

    
    tmIDs = test_data['tmID']
    confIDs = test_data['confID']
    train_data.drop(['tmID', 'confID', 'year'], axis=1, inplace=True)
    test_data.drop(['tmID', 'confID', 'year'], axis=1, inplace=True)
    
    X_train, y_train = train_data.drop(columns=['playoff']), train_data['playoff']
    X_test, y_test = test_data.drop(columns=['playoff']), test_data['playoff']
    
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    probs = model.predict_proba(X_test)[:, 1]
        
    results = pd.DataFrame({
        'tmID' : tmIDs,
        'confID': confIDs,
        'playoff' : y_pred,
        'probs' : probs
    })
    
    result_yes_east = results[results.confID == 'EA'].nlargest(4, 'probs')
    result_yes_west = results[results.confID == 'WE'].nlargest(4, 'probs')

    
    
    results['playoff'] = results.apply(lambda row: 'Y' if row['tmID'] in result_yes_east['tmID'].values or row['tmID'] in result_yes_west['tmID'].values  else 'N', axis=1)
    
    return results



In [2]:
def train_model(data, test_year, model):
    train_data, test_data = train_test_split_by_year(data, test_year)
    result = evaluate_model(model, train_data, test_data)
    
    return result
    

In [3]:
data = pd.read_csv('teams_final_zeros.csv')

In [4]:
# Define the parameters
params = {
    'alpha' : 0.1,
    'class_prior' : [0.3, 0.7],
    'fit_prior' : True
}

season11_results = train_model(data, 11, MultinomialNB(**params))


In [5]:
season11_results.drop(['confID', 'probs'], axis=1, inplace = True)
season11_results.to_csv('season11_results.csv', index = False)