In [36]:
import pulp
import pandas as pd
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Embedding, Flatten, Input, Concatenate
from tensorflow.keras.utils import to_categorical

# Load datasets and preprocess as before
historical_data = pd.read_csv('/Users/yondawg/Documents/HistoricalData(2013-2023).csv')
upcoming_season_data = pd.read_csv('/Users/yondawg/Documents/NewData(2024).csv')

all_teams = pd.concat([historical_data['Team'], upcoming_season_data['Team']]).unique()
team_to_index = {team: i + 1 for i, team in enumerate(all_teams)}
team_to_index['unknown'] = 0

historical_data['Team_idx'] = historical_data['Team'].map(team_to_index)
upcoming_season_data['Team_idx'] = upcoming_season_data['Team'].apply(lambda x: team_to_index.get(x, 0))

historical_features = pd.get_dummies(historical_data.drop(['POSTSEASON', 'Team'], axis=1), columns=['CONF'])
scaler = MinMaxScaler()
historical_features_scaled = scaler.fit_transform(historical_features)

encoder = LabelEncoder()
historical_target_encoded = encoder.fit_transform(historical_target)
historical_target_one_hot = to_categorical(historical_target_encoded)

X_train, X_test, y_train, y_test, team_train, team_test = train_test_split(
    historical_features_scaled, historical_target_one_hot, historical_data['Team_idx'], test_size=0.2, random_state=42
)

# Define your neural network structure and compile it as before
team_input = Input(shape=(1,), dtype='int64', name='team_input')
x = Embedding(input_dim=len(team_to_index) + 1, output_dim=10, input_length=1)(team_input)
x = Flatten()(x)
features_input = Input(shape=(X_train.shape[1],), name='features_input')
x = Concatenate()([x, features_input])
x = Dense(64, activation='relu')(x)
x = Dense(32, activation='relu')(x)
x = Dense(32, activation='relu')(x)
output = Dense(y_train.shape[1], activation='softmax')(x)
model = Model(inputs=[team_input, features_input], outputs=output)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit([team_train, X_train], y_train, epochs=75, batch_size=32, validation_split=0.2)

# Model evaluation
loss, accuracy = model.evaluate([team_test, X_test], y_test)
print(f'Test accuracy: {accuracy}')

# Prepare upcoming season data for prediction
upcoming_season_features = pd.get_dummies(upcoming_season_data.drop(['Team', 'Team_idx'], axis=1), columns=['CONF'])
upcoming_season_features = upcoming_season_features.reindex(columns=historical_features.columns, fill_value=0)
upcoming_season_features_scaled = scaler.transform(upcoming_season_features)
team_upcoming = upcoming_season_data['Team_idx']
predictions = model.predict([team_upcoming, upcoming_season_features_scaled])

# Define indices for readability in constraints (assuming model output order)
CHAMPION, RUNNER_UP, FINAL_FOUR, ELITE_EIGHT, SWEET_SIXTEEN, ROUND_OF_32 = 0, 1, 2, 3, 4, 5

num_teams = len(predictions)
prob = pulp.LpProblem("Tournament_Place_Prediction", pulp.LpMaximize)

# Creating binary variables for each category
champions = pulp.LpVariable.dicts("Champion", range(num_teams), cat=pulp.LpBinary)
runners_up = pulp.LpVariable.dicts("Runner_Up", range(num_teams), cat=pulp.LpBinary)
final_four = pulp.LpVariable.dicts("Final_Four", range(num_teams), cat=pulp.LpBinary)
elite_eight = pulp.LpVariable.dicts("Elite_Eight", range(num_teams), cat=pulp.LpBinary)
sweet_sixteen = pulp.LpVariable.dicts("Sweet_Sixteen", range(num_teams), cat=pulp.LpBinary)
round_of_32 = pulp.LpVariable.dicts("Round_of_32", range(num_teams), cat=pulp.LpBinary)

# Objective function
prob += pulp.lpSum([
    champions[i] * predictions[i, CHAMPION] +
    runners_up[i] * predictions[i, RUNNER_UP] +
    final_four[i] * predictions[i, FINAL_FOUR] +
    elite_eight[i] * predictions[i, ELITE_EIGHT] +
    sweet_sixteen[i] * predictions[i, SWEET_SIXTEEN] +
    round_of_32[i] * predictions[i, ROUND_OF_32]
    for i in range(num_teams)])

# Constraints
# Single champion and runner-up
prob += pulp.lpSum(champions) == 1
prob += pulp.lpSum(runners_up) == 1

# Four Final Four teams (including champion and runner-up)
prob += pulp.lpSum(final_four) == 4
for i in range(num_teams):
    prob += final_four[i] >= champions[i]
    prob += final_four[i] >= runners_up[i]

# Eight Elite Eight teams (including Final Four)
prob += pulp.lpSum(elite_eight) == 8
for i in range(num_teams):
    prob += elite_eight[i] >= final_four[i]

# Sixteen Sweet Sixteen teams (including Elite Eight)
prob += pulp.lpSum(sweet_sixteen) == 16
for i in range(num_teams):
    prob += sweet_sixteen[i] >= elite_eight[i]

# Thirty-two Round of 32 teams (including Sweet Sixteen)
prob += pulp.lpSum(round_of_32) == 32
for i in range(num_teams):
    prob += round_of_32[i] >= sweet_sixteen[i]

# Solve the ILP problem
prob.solve()

# Extract results and map back to teams
postseason_roles = ["none"] * num_teams
for i in range(num_teams):
    if pulp.value(champions[i]):
        postseason_roles[i] = "Champion"
    elif pulp.value(runners_up[i]):
        postseason_roles[i] = "Runner Up"
    elif pulp.value(final_four[i]):
        postseason_roles[i] = "Final Four"
    elif pulp.value(elite_eight[i]):
        postseason_roles[i] = "Elite Eight"
    elif pulp.value(sweet_sixteen[i]):
        postseason_roles[i] = "Sweet Sixteen"
    elif pulp.value(round_of_32[i]):
        postseason_roles[i] = "Round of 32"

upcoming_season_data['Predicted_POSTSEASON'] = postseason_roles
print(upcoming_season_data[['Predicted_POSTSEASON']])

# Save the predictions
upcoming_season_data.to_csv('/Users/yondawg/Documents/UpcomingSeasonPredictions.csv', index=False)


Epoch 1/75
Epoch 2/75
Epoch 3/75
Epoch 4/75
Epoch 5/75
Epoch 6/75
Epoch 7/75
Epoch 8/75
Epoch 9/75
Epoch 10/75
Epoch 11/75
Epoch 12/75
Epoch 13/75
Epoch 14/75
Epoch 15/75
Epoch 16/75
Epoch 17/75
Epoch 18/75
Epoch 19/75
Epoch 20/75
Epoch 21/75
Epoch 22/75
Epoch 23/75
Epoch 24/75
Epoch 25/75
Epoch 26/75
Epoch 27/75
Epoch 28/75
Epoch 29/75
Epoch 30/75
Epoch 31/75
Epoch 32/75
Epoch 33/75
Epoch 34/75
Epoch 35/75
Epoch 36/75
Epoch 37/75
Epoch 38/75
Epoch 39/75
Epoch 40/75
Epoch 41/75
Epoch 42/75
Epoch 43/75
Epoch 44/75
Epoch 45/75
Epoch 46/75
Epoch 47/75
Epoch 48/75
Epoch 49/75
Epoch 50/75
Epoch 51/75
Epoch 52/75
Epoch 53/75
Epoch 54/75
Epoch 55/75
Epoch 56/75
Epoch 57/75
Epoch 58/75
Epoch 59/75
Epoch 60/75
Epoch 61/75
Epoch 62/75
Epoch 63/75
Epoch 64/75
Epoch 65/75
Epoch 66/75
Epoch 67/75
Epoch 68/75
Epoch 69/75
Epoch 70/75
Epoch 71/75
Epoch 72/75
Epoch 73/75
Epoch 74/75
Epoch 75/75
Test accuracy: 0.4852941036224365
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

