In [171]:
# Step 1: Importing the required libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow import keras
from tensorflow.keras import layers


In [172]:
# Step 2: Import data

# Import data from CSV files
data = pd.read_csv('/Users/johnparentejr/Documents/Final_Project-main/FP1/MM_CSV/Resumes.csv')


In [173]:
# Load Team Resume data
data

Unnamed: 0,YEAR,TEAM NO,TEAM,SEED,ROUND,NET RPI,RESUME,WAB RANK,ELO,B POWER,Q1 W,Q2 W,Q1 PLUS Q2 W,Q3 Q4 L,PLUS 500,R SCORE,BID TYPE
0,2024,1079,Akron,14,0,106,131,104,103,111.5,0,2,2,4,13,0.00,At-Large
1,2024,1078,Alabama,4,0,10,31,15,31,11.5,4,7,11,0,11,99.35,At-Large
2,2024,1077,Arizona,2,0,4,10,11,14,5.5,8,7,15,1,18,99.87,At-Large
3,2024,1076,Auburn,4,0,5,45,9,5,4.0,3,10,13,0,21,99.42,At-Large
4,2024,1075,Baylor,3,0,13,8,10,25,13.5,10,4,14,0,14,99.73,At-Large
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1074,2008,5,West Virginia,7,16,29,32,28,29,20.0,3,2,5,1,13,96.80,At-Large
1075,2008,4,Western Kentucky,12,16,39,124,46,25,45.0,0,1,1,1,19,5.30,Auto
1076,2008,3,Winthrop,13,64,108,121,108,93,96.0,1,2,3,7,9,0.00,Auto
1077,2008,2,Wisconsin,3,16,11,9,8,5,6.0,6,2,8,0,25,99.90,Auto


In [174]:
print(data['BID TYPE'].unique())

['At-Large' 'Auto']


In [175]:
# Step 3: Convert 'BID TYPE' column to binary values
# Auto = 1, At-Large = 0
data['BID TYPE'] = data['BID TYPE'].map({'Auto': 1, 'At-Large': 0})

In [176]:
data

Unnamed: 0,YEAR,TEAM NO,TEAM,SEED,ROUND,NET RPI,RESUME,WAB RANK,ELO,B POWER,Q1 W,Q2 W,Q1 PLUS Q2 W,Q3 Q4 L,PLUS 500,R SCORE,BID TYPE
0,2024,1079,Akron,14,0,106,131,104,103,111.5,0,2,2,4,13,0.00,0
1,2024,1078,Alabama,4,0,10,31,15,31,11.5,4,7,11,0,11,99.35,0
2,2024,1077,Arizona,2,0,4,10,11,14,5.5,8,7,15,1,18,99.87,0
3,2024,1076,Auburn,4,0,5,45,9,5,4.0,3,10,13,0,21,99.42,0
4,2024,1075,Baylor,3,0,13,8,10,25,13.5,10,4,14,0,14,99.73,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1074,2008,5,West Virginia,7,16,29,32,28,29,20.0,3,2,5,1,13,96.80,0
1075,2008,4,Western Kentucky,12,16,39,124,46,25,45.0,0,1,1,1,19,5.30,1
1076,2008,3,Winthrop,13,64,108,121,108,93,96.0,1,2,3,7,9,0.00,1
1077,2008,2,Wisconsin,3,16,11,9,8,5,6.0,6,2,8,0,25,99.90,1


In [177]:
# Step 4: Feature Selection
# Features = SEED, NET RPI, WAB RANK, ELO, B POWER, & PLUS 500
# BID TYPE is the target variable
X = data[['SEED', 'NET RPI', 'WAB RANK', 'ELO', 'B POWER', 'PLUS 500']]
y = data['BID TYPE']


In [178]:
# Step 5: Train Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [179]:
# Step 6: Initialize and fit the Standard Scaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [180]:
# Check if there are any missing values or infinite values in the data after scaling
assert not np.isnan(X_train).any(), "There are missing values in the training data"
assert not np.isnan(X_test).any(), "There are missing values in the testing data"
assert not np.isinf(X_train).any(), "There are infinite values in the training data"
assert not np.isinf(X_test).any(), "There are infinite values in the testing data"

In [181]:
# Step 7: Build the neural network
model = keras.Sequential([
    layers.Dense(32, activation='relu', input_shape=[len(X_train[0])]),
    layers.Dense(64, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [182]:
# Step 8: Compile Model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [183]:
# Step 9: Train the model
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

Epoch 1/50


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6165 - loss: 0.6635 - val_accuracy: 0.8150 - val_loss: 0.5591
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7876 - loss: 0.5350 - val_accuracy: 0.8150 - val_loss: 0.4823
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8086 - loss: 0.4699 - val_accuracy: 0.8266 - val_loss: 0.4332
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8422 - loss: 0.4262 - val_accuracy: 0.8439 - val_loss: 0.4010
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 816us/step - accuracy: 0.8404 - loss: 0.4178 - val_accuracy: 0.8439 - val_loss: 0.3884
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 811us/step - accuracy: 0.8227 - loss: 0.3984 - val_accuracy: 0.8439 - val_loss: 0.3795
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x2bd9c9050>

In [184]:
# Step 10: Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Accuracy: {accuracy*100}%')

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 446us/step - accuracy: 0.7941 - loss: 0.4309
Accuracy: 81.4814805984497%


In [185]:
# Step 11: Make predictions
predictions = model.predict(X_test)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
