<a href="https://colab.research.google.com/github/Fish210/3470-Competition-Team-Optimization-Model/blob/main/compTeamOptModel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
#imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
##########
#Declarations for columns
df = pd.read_csv("Comp Team Opt Dummy Dataset - Sheet1.csv")
cols = ["person_id", "event_id", "attendance_avg", "journal_rating", "reliability_rating", "label_selected"]
df = df[cols].copy()

#Cleaning data
df = df.dropna() #Remove blank rows if needed
df["label_selected"] = df["label_selected"].astype(int)

#Create X & Y Labels
X = df[["attendance_avg", "journal_rating", "reliability_rating"]]
y = df["label_selected"]

#Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

#Train Random Forest Model
model = RandomForestClassifier(
    n_estimators=300,
    random_state=42,
    class_weight="balanced"
)
model.fit(X_train, y_train)

#Evaluate
pred = model.predict(X_test)
print("Confusion Matrix:\n", confusion_matrix(y_test, pred))
print("\nReport:\n", classification_report(y_test, pred))

#Feature
importances = pd.Series(model.feature_importances_, index=X.columns).sort_values(ascending=False)
print("\nFeature importance:\n", importances)

#Rank System and round to two decimal places
df["benefit_probability"] = model.predict_proba(X)[:, 1].round(2)

ranked = df.sort_values("benefit_probability", ascending=False)

# Display Top 20 Pick
ranked[
    [
        "person_id",
        "event_id",
        "attendance_avg",
        "journal_rating",
        "reliability_rating",
        "benefit_probability",
        "label_selected",
    ]
].head(20) #Change for different num of top picks

#Save Rank List
ranked.to_csv("ranked_list.csv", index=False)
print("\nSaved ranked_list.csv!")

Confusion Matrix:
 [[2 0]
 [2 0]]

Report:
               precision    recall  f1-score   support

           0       0.50      1.00      0.67         2
           1       0.00      0.00      0.00         2

    accuracy                           0.50         4
   macro avg       0.25      0.50      0.33         4
weighted avg       0.25      0.50      0.33         4


Feature importance:
 attendance_avg        0.592615
journal_rating        0.216414
reliability_rating    0.190972
dtype: float64

Saved ranked_list.csv!


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [3]:
import pandas as pd

# Overall team averages
overall_avg = df[
    ["attendance_avg", "journal_rating", "reliability_rating"]
].mean().round(2)

# Comp team averages
comp_avg = df[df["label_selected"] == 1][
    ["attendance_avg", "journal_rating", "reliability_rating"]
].mean().round(2)

# Combine into a single table
avg_df = pd.DataFrame({
    "Overall Team": overall_avg,
    "Competition Team": comp_avg
})

avg_df


Unnamed: 0,Overall Team,Competition Team
attendance_avg,0.67,0.91
journal_rating,2.5,3.0
reliability_rating,2.2,2.67
