# Method

### Import support packages

In [None]:
import os
import kagglehub
from IPython.display import clear_output


from DataGeneration import (
    get_season_mvp,
    collect_player_stats,
    load_combined_mvp_2001_2023,
    fetch_mvp_stats_by_year,
    check_missing_players,
    print_missing_players,
    get_player_season,
)

from DataProcessing import (
    get_iterable_window_data,
)

from DataExploration import (
    visualize_combined_sections,    
)

from DataModelling import (
    analyze_season_sections,
)

from DataFinalDisplay import (
    analyze_mvp_share_importance_over_time_sliding_windows
)

### make directories
player_data_path = 'Raw_player_data_sets'
window_player_path = 'windowed_data_sets'

os.makedirs(player_data_path, exist_ok=True)
os.makedirs(window_player_path, exist_ok=True)

custom_control_input = input("Do you want custom control? (yes/no): ").strip().lower()
custom_control = True if custom_control_input == 'yes' else False



### Fetch and save dataset

Yearly MVP results

In [None]:
mvp_data = load_combined_mvp_2001_2023()
players_by_year = mvp_data.groupby('year')['Player'].apply(list).to_dict()


fetch_mvp_stats_by_year(mvp_data, False, player_data_path, combined_mvp_data_path= "MVP_data-sets/combined_MVP_data_set.csv")

tempdic = check_missing_players(players_by_year,player_data_path)

### Missing player entries:

In [None]:
print_missing_players(tempdic)

### generate windows and new data sets

In [None]:
get_iterable_window_data(players_by_year=players_by_year, data_path=player_data_path, mvp_data_path="MVP_data-sets/combined_MVP_data_set.csv")

### run to get visualisations of a given window
### remember to enter a number!

In [None]:
# Define player data path and players_by_year dictionary
player_data_path = 'Raw_player_data_sets'


if custom_control:
    # Get the iterator for window data, allow user to choose whether to split into sections or use full season
    split_choice = input("Do you want to split the season into start, middle, and finish sections? (yes/no): ").strip().lower()
    split_into_sections = True if split_choice == 'yes' else False
else:
    split_into_sections = True


# Get the iterator for window data
window_data_iterator = get_iterable_window_data(players_by_year, player_data_path, split_into_sections=split_into_sections,mvp_data_path="MVP_data-sets/combined_MVP_data_set.csv", return_aggregated=False)

# Convert the iterator to a list to allow indexing for selection
windows_data = list(window_data_iterator)


###
### un-comment for data exploration graphics
###

if custom_control:
    # List the available windows for selection
    for i, window in enumerate(windows_data):
        print(f"Window {i}: Years - {window['window_years']}")

    # Let the user choose a window
    selected_window_index = int(input("Enter the window number you want to visualize: "))
else:
    selected_window_index = 18



# Get the selected window's data
selected_window_data = windows_data[selected_window_index]

# Generate the visualizations for the selected window based on the user's choice
if split_into_sections:
    # Extract the combined sections
    combined_start_section = selected_window_data['combined_start_section']
    combined_middle_section = selected_window_data['combined_middle_section']
    combined_finish_section = selected_window_data['combined_finish_section']

    # Generate the visualizations for the selected window with divided sections
    visualize_combined_sections(
        start_section=combined_start_section,
        middle_section=combined_middle_section,
        finish_section=combined_finish_section,
        save_plots=True
    )



else:
    # Extract the combined season
    combined_season = selected_window_data['combined_season']
    # Generate the visualizations for the selected window with the full season
    visualize_combined_sections(combined_season=combined_season)


### Data modelling

In [6]:
### genrate new iterable to use
window_for_modelling = get_iterable_window_data(players_by_year, player_data_path, split_into_sections=True,mvp_data_path="MVP_data-sets/combined_MVP_data_set.csv", return_aggregated=False)

window_data_iterator = analyze_season_sections(window_for_modelling, verbose=False)
 


### gnerate trends

In [None]:
# Your list of overlapping windows
windows = [
    [2001, 2002, 2003],
    [2002, 2003, 2004],
    [2003, 2004, 2005],
    [2004, 2005, 2006],
    [2005, 2006, 2007],
    [2006, 2007, 2008],
    [2007, 2008, 2009],
    [2008, 2009, 2010],
    [2009, 2010, 2011],
    [2010, 2011, 2012],
    [2011, 2012, 2013],
    [2012, 2013, 2014],
    [2013, 2014, 2015],
    [2014, 2015, 2016],
    [2015, 2016, 2017],
    [2016, 2017, 2018],
    [2017, 2018, 2019],
    [2018, 2019, 2020],
    [2019, 2020, 2021],
    [2020, 2021, 2022],
    [2021, 2022, 2023]
]

# Generate window_identifiers using the starting year of each window
window_identifiers = [window[0] for window in windows]

feature_importances_over_time, stat_results_df = analyze_mvp_share_importance_over_time_sliding_windows(
    window_data_iterator=window_data_iterator,
    window_identifiers=window_identifiers,
    verbose=True,
    plot_trends=True,
    plot_predictions=[2018],
    test_size=0.2,
    random_state=42
)