## NBA CTG (Cleaning the Glass) ingestion for Transition metrics and merging with Travel metrics
### https://cleaningtheglass.com/stats/team/2/team#tab-context_trans_offense
###
### Authored by Murali Balasubramanian, DBA, Walsh College, MI, USA

In [None]:
import pandas as pd

In [None]:
df_transition_perc = pd.read_excel('Transition_metric_travel_24_25.xlsx')
# Convert to datetime type
df_transition_perc['Date'] = pd.to_datetime(df_transition_perc['Date'])
#
# First sort by Team to group teams together
# Then sort by Date within each team group
df_transition_perc = df_transition_perc.sort_values(['Team', 'Date'])
df_transition_perc.reset_index(inplace=True)

In [None]:
def transfer_travel_data(df_transition_perc, df_travel_transition):
    """
    Transfer travel-related data from df_travel_transition to df_transition_perc
    based on venue transitions and home/away status.
    """
    # Create a dictionary to store the transferred data for each team
    result_data = []
    
    # Process each team separately
    for team in df_transition_perc['Team'].unique():
        # Get all rows for current team
        team_rows = df_transition_perc[df_transition_perc['Team'] == team].reset_index(drop=True)
        
        # Process first game of the team
        first_row = team_rows.iloc[0]
        if first_row['Home/Away'] == 'Home':
            # Home game - Team plays at home venue
            matching_row = df_travel_transition[
                (df_travel_transition['From'] == team) & 
                (df_travel_transition['To'] == team)
            ].iloc[0]
        else:
            # Away game - Team travels to opponent's venue
            matching_row = df_travel_transition[
                (df_travel_transition['From'] == team) & 
                (df_travel_transition['To'] == first_row['Opp'])
            ].iloc[0]
            
        # Add travel data to first row
        travel_columns = [
            'FlightDist_km', 'FlightTime_hr', 'BusTime_hr', 
            'TZ_Offset_hr', 'ElevationDelta_ft', 'TotalTravel_hr',
            'NonRest_hr', 'AdjustedRest_hr'
        ]
        result_data.append({
            **first_row.to_dict(),
            **{col: matching_row[col] for col in travel_columns}
        })
        
        # Process remaining games for the team
        for i in range(1, len(team_rows)):
            current_row = team_rows.iloc[i]
            previous_venue = team_rows.iloc[i-1]['Venue']
            current_venue = current_row['Venue']
            
            # Find matching transition data
            matching_row = df_travel_transition[
                (df_travel_transition['From'] == previous_venue) & 
                (df_travel_transition['To'] == current_venue)
            ].iloc[0]
            
            # Add travel data to current row
            result_data.append({
                **current_row.to_dict(),
                **{col: matching_row[col] for col in travel_columns}
            })
    
    # Convert results to DataFrame and return
    return pd.DataFrame(result_data)

In [None]:
merged_df = transfer_travel_data(df_transition_perc, df_travel_transition)