In [None]:
from lib.nba_dataset import load_nba, load_rookies
SEASON = '2021'
df_rookies = load_rookies(SEASON)
df_NBA = load_nba(SEASON)
print(df_NBA.data.head())
print(df_NBA.record.head())
print(df_NBA.target.head())
print(df_NBA.frame.head())
print(df_NBA.target_names)
print(df_NBA.feature_names)
print(df_NBA.DESCR)



In [None]:
from lib.nba_dataset import update_combined
from sklearn.utils import Bunch
import pandas as pd
import numpy as np

START = 2020
LEARN = 4
TEST = 2025
seasons = list(map(str, range(START, START+LEARN, 1)))
nba = [Bunch() for _ in seasons]

nba_combined = Bunch(
        data=pd.DataFrame(),
        target=pd.DataFrame(),
        record=pd.DataFrame(),
        frame=pd.DataFrame(),
        target_names=str,   
        DESCR=str,
        feature_names=np.ndarray,
        data_module="sklearn.datasets.data",
    )

nba_predict = load_nba(str(TEST))
for season_id, season in enumerate(seasons):
    nba[season_id] = load_nba(season)
    nba_combined = update_combined(nba_combined, nba[season_id])
print(f'Number of players in seasons: {START} - {START + LEARN}: {len(nba_combined.data)}')

In [None]:
from lib.nba_dataset import filter_dataset
from sklearn.impute import KNNImputer

imputer = KNNImputer(n_neighbors=5)
nba_combined = filter_dataset(
    nba_combined,
    rookies=False,
    fill_na=lambda col: pd.Series(imputer.fit_transform(col.values.reshape(-1, 1)).flatten(), index=col.index) if col.isnull().any() else col,
    debug=True,
    drop_low_chances=True
)
nba_predict = filter_dataset(
    nba_predict,
    rookies=False,
    fill_na=lambda col: pd.Series(imputer.fit_transform(col.values.reshape(-1, 1)).flatten(), index=col.index) if col.isnull().any() else col,
    debug=True,
    drop_low_chances=True
)

In [None]:
from lib.metric import get_corr_charts
import matplotlib.pyplot as plt

charts_dict = get_corr_charts(nba_combined)

In [None]:
from sklearn.ensemble import RandomForestClassifier
from lib.metric import get_weighted_team_predictions, calculate_score
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

clf = RandomForestClassifier()

clf.fit(nba_combined.data, nba_combined.target.values.flatten().astype(int))

probs = clf.predict_proba(nba_predict.data)
results_dict, true_results_dict, players_team, true_team = get_weighted_team_predictions(
    probs, nba_predict.target, nba_predict.record
)
score = calculate_score(results_dict, true_results_dict)
max_score = calculate_score(true_results_dict, true_results_dict)
print(f'Your score: {score} / {max_score}!')

cm_plot, ax = plt.subplots()
cm = confusion_matrix(true_team, players_team, labels=[1, 2, 3])
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[1, 2, 3])
disp.plot(ax=ax)
charts_dict["confusion_matrix"] = cm_plot
