In [104]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
import pulp

#change the file path for both the datasets 
#i'll send an email of both that I updated 
historical_data = pd.read_csv('/Users/yondawg/Documents/HistoricalData(2013-2023).csv')
historical_data = historical_data.drop(['Team'], axis=1)

historical_features = pd.get_dummies(historical_data.drop('POSTSEASON', axis=1), columns=['CONF'])
historical_target = historical_data['POSTSEASON']


upcoming_season_data = pd.read_csv('/Users/yondawg/Documents/NewData(2024).csv')
upcoming_season_data = upcoming_season_data.drop(['Team'], axis=1)
upcoming_season_features = pd.get_dummies(upcoming_season_data, columns=['CONF'])

#align the feature columns in the upcoming season data with the historical features
upcoming_season_features = upcoming_season_features.reindex(columns=historical_features.columns, fill_value=0)

#normalizing the data
scaler = MinMaxScaler()
historical_features_scaled = scaler.fit_transform(historical_features)
upcoming_season_features_scaled = scaler.transform(upcoming_season_features)

#historical_features_scaled
#upcoming_season_features_scaled
encoder = LabelEncoder()
historical_target_encoded = encoder.fit_transform(historical_target)
historical_target_one_hot = to_categorical(historical_target_encoded)

#split up the data
X_train, X_test, y_train, y_test = train_test_split(historical_features_scaled, historical_target_one_hot, test_size=0.2, random_state=42)
#X_train, X_test, y_train, y_test = train_test_split(historical_features_scaled, historical_target, test_size=0.2, random_state=42)

X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)


model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(32, activation='relu'),
    Dense(32, activation='relu'),
    Dense(8, activation='softmax')  #7 different outcomes for the output layer
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=75, batch_size=32, validation_split=0.2)

loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test accuracy: {accuracy}')

predictions = model.predict(upcoming_season_features_scaled)

#convert predictions to labels
predicted_classes = np.argmax(predictions, axis=1)
predicted_labels = encoder.inverse_transform(predicted_classes)


CHAMPION, RUNNER_UP, FINAL_FOUR, ELITE_EIGHT, SWEET_SIXTEEN, ROUND_OF_32 = 0, 1, 2, 3, 4, 5

num_teams = len(predictions)
prob = pulp.LpProblem("Tournament_Place_Prediction", pulp.LpMaximize)

#creating binary variables for each category
champions = pulp.LpVariable.dicts("Champion", range(num_teams), cat=pulp.LpBinary)
runners_up = pulp.LpVariable.dicts("Runner_Up", range(num_teams), cat=pulp.LpBinary)
final_four = pulp.LpVariable.dicts("Final_Four", range(num_teams), cat=pulp.LpBinary)
elite_eight = pulp.LpVariable.dicts("Elite_Eight", range(num_teams), cat=pulp.LpBinary)
sweet_sixteen = pulp.LpVariable.dicts("Sweet_Sixteen", range(num_teams), cat=pulp.LpBinary)
round_of_32 = pulp.LpVariable.dicts("Round_of_32", range(num_teams), cat=pulp.LpBinary)

# Objective function
prob += pulp.lpSum([
    champions[i] * predictions[i, CHAMPION] +
    runners_up[i] * predictions[i, RUNNER_UP] +
    final_four[i] * predictions[i, FINAL_FOUR] +
    elite_eight[i] * predictions[i, ELITE_EIGHT] +
    sweet_sixteen[i] * predictions[i, SWEET_SIXTEEN] +
    round_of_32[i] * predictions[i, ROUND_OF_32]
    for i in range(num_teams)])

# Constraints
#one champion and runner-up
prob += pulp.lpSum(champions) == 1
prob += pulp.lpSum(runners_up) == 1

#4 Final Four teams (including champion and runner-up)
prob += pulp.lpSum(final_four) == 4
for i in range(num_teams):
    prob += final_four[i] >= champions[i]
    prob += final_four[i] >= runners_up[i]

#8 Elite Eight teams (including Final Four)
prob += pulp.lpSum(elite_eight) == 8
for i in range(num_teams):
    prob += elite_eight[i] >= final_four[i]

#16 Sweet Sixteen teams (including Elite Eight)
prob += pulp.lpSum(sweet_sixteen) == 16
for i in range(num_teams):
    prob += sweet_sixteen[i] >= elite_eight[i]

#32 Round of 32 teams (including Sweet Sixteen)
prob += pulp.lpSum(round_of_32) == 32
for i in range(num_teams):
    prob += round_of_32[i] >= sweet_sixteen[i]

#solve the ILP problem
prob.solve()

#map results back to teams
postseason_roles = ["None"] * num_teams
for i in range(num_teams):
    if pulp.value(champions[i]):
        postseason_roles[i] = "Champion"
    elif pulp.value(runners_up[i]):
        postseason_roles[i] = "Runner Up"
    elif pulp.value(final_four[i]):
        postseason_roles[i] = "Final Four"
    elif pulp.value(elite_eight[i]):
        postseason_roles[i] = "Elite Eight"
    elif pulp.value(sweet_sixteen[i]):
        postseason_roles[i] = "Sweet Sixteen"
    elif pulp.value(round_of_32[i]):
        postseason_roles[i] = "Round of 32"

upcoming_season_data['Predicted_POSTSEASON'] = postseason_roles

# Save the predictions
upcoming_season_data.to_csv('/Users/yondawg/Documents/UpcomingSeasonPredictions(no-names).csv', index=False)


Epoch 1/75
Epoch 2/75
Epoch 3/75
Epoch 4/75
Epoch 5/75
Epoch 6/75
Epoch 7/75
Epoch 8/75
Epoch 9/75
Epoch 10/75
Epoch 11/75
Epoch 12/75
Epoch 13/75
Epoch 14/75
Epoch 15/75
Epoch 16/75
Epoch 17/75
Epoch 18/75
Epoch 19/75
Epoch 20/75
Epoch 21/75
Epoch 22/75
Epoch 23/75
Epoch 24/75
Epoch 25/75
Epoch 26/75
Epoch 27/75
Epoch 28/75
Epoch 29/75
Epoch 30/75
Epoch 31/75
Epoch 32/75
Epoch 33/75
Epoch 34/75
Epoch 35/75
Epoch 36/75
Epoch 37/75
Epoch 38/75
Epoch 39/75
Epoch 40/75
Epoch 41/75
Epoch 42/75
Epoch 43/75
Epoch 44/75
Epoch 45/75
Epoch 46/75
Epoch 47/75
Epoch 48/75
Epoch 49/75
Epoch 50/75
Epoch 51/75
Epoch 52/75
Epoch 53/75
Epoch 54/75
Epoch 55/75
Epoch 56/75
Epoch 57/75
Epoch 58/75
Epoch 59/75
Epoch 60/75
Epoch 61/75
Epoch 62/75
Epoch 63/75
Epoch 64/75
Epoch 65/75
Epoch 66/75
Epoch 67/75
Epoch 68/75
Epoch 69/75
Epoch 70/75
Epoch 71/75
Epoch 72/75
Epoch 73/75
Epoch 74/75
Epoch 75/75
Test accuracy: 0.4632352888584137
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

