In [3]:
import pandas as pd
import requests
import plotly.graph_objects as go
import math
from scipy import stats
import string
import numpy as np
import time
from scipy.stats import zscore
import sys
import os

def lineuppull(team_id, season, opp=False, ps=False):
    term = "Opponent" if opp else "Team"
    s_type = "Playoffs" if ps else "Regular Season"
    
    wowy_url = "https://api.pbpstats.com/get-wowy-stats/nba"
    print(team_id)
    wowy_params = {
        "TeamId": team_id,
        "Season": season,
        "SeasonType": s_type,
        "Type": term
    }
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36 Edg/115.0.1901.183',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
        'Accept-Language': 'en-US,en;q=0.9',
        'Accept-Encoding': 'gzip, deflate, br',
        'DNT': '1',
        'Connection': 'keep-alive',
        'Upgrade-Insecure-Requests': '1',
        'Sec-Fetch-Dest': 'document',
        'Sec-Fetch-Mode': 'navigate',
        'Sec-Fetch-Site': 'none',
        'Sec-Fetch-User': '?1',
        'Cache-Control': 'max-age=0',
    }
    
    wowy_response = requests.get(wowy_url, params=wowy_params, headers=headers)
    wowy = wowy_response.json()
    combos = wowy["multi_row_table_data"]
    frame_length = len(combos)
    df = pd.DataFrame(combos, index=[0]*frame_length)
    return df

def get_filename(team_id, year, opp=False, ps=False):
    """Generate filename based on parameters"""
    filename = f"{team_id}"
    if opp:
        filename += "_vs"
    if ps:
        filename += "_ps"
    filename += ".csv"
    return filename

def pull_onoff(years, opp=False, ps=False):
    count = 0
    if ps == False:
        player_index = pd.read_csv('index_master.csv')
    else:
         player_index = pd.read_csv('index_master_ps.csv')
    player_index = player_index[player_index.team != 'TOT']
    player_index = player_index[player_index.year > 2000]
    player_index = player_index.drop_duplicates()
    all_frames = []
    
    for year in years:
        # Create year directory if it doesn't exist
        year_dir = f"data/{year}"
        os.makedirs(year_dir, exist_ok=True)
        
        season_index = player_index[player_index.year == year].reset_index(drop=True)
        season = f"{year-1}-{str(year)[-2:]}"
        
        frames = []
        fail_list = []
        
        for team_id in season_index.team_id.unique():
            # Generate filename for this team/year combination
            filename = get_filename(team_id, year, opp, ps)
            filepath = os.path.join(year_dir, filename)
            
            # Check if file already exists
            if os.path.exists(filepath):
                print(f"File already exists for team {team_id} in {year}, skipping...")
                # Optionally read existing file and add to frames
                existing_df = pd.read_csv(filepath)
                frames.append(existing_df)
                continue
            
            try:
                df = lineuppull(team_id, season, opp=opp, ps=ps)
                df = df.reset_index(drop=True)
                df['team_id'] = team_id
                df['year'] = year
                df['season'] = season
                df['team_vs'] = opp
                
                # Save individual team file
                df.to_csv(filepath, index=False)
                print(f"Saved data for team {team_id} in {year}")
                
                frames.append(df)
                count += 1
                
            except Exception as e:
                print(f"Error processing team {team_id} in {year}: {str(e)}")
                fail_list.append((team_id, year))
        
        if frames:
            year_frame = pd.concat(frames)
            all_frames.append(year_frame)
            print(f'Year {year} Completed')
        
    if fail_list:
        print("\nFailed to process the following team/year combinations:")
        for team, year in fail_list:
            print(f"Team: {team}, Year: {year}")
    
    return pd.concat(all_frames) if all_frames else pd.DataFrame()
        
#pull_onoff(years,opp=True,ps=True) 
#pull_onoff(years,opp=False,ps=True) 
years=[i for i in range(2001,2026)]
df = pull_onoff(years,opp=False,ps=False) 
df = pull_onoff(years,opp=True,ps=False) 

years=[i for i in range(2001,2025)]
#df = pull_onoff(years,opp=False,ps=True) 
#df = pull_onoff(years,opp=True,ps=True) 


File already exists for team 1610612763 in 2001, skipping...
File already exists for team 1610612743 in 2001, skipping...
File already exists for team 1610612753 in 2001, skipping...
File already exists for team 1610612742 in 2001, skipping...
File already exists for team 1610612764 in 2001, skipping...
File already exists for team 1610612749 in 2001, skipping...
File already exists for team 1610612759 in 2001, skipping...
File already exists for team 1610612738 in 2001, skipping...
File already exists for team 1610612758 in 2001, skipping...
File already exists for team 1610612745 in 2001, skipping...
File already exists for team 1610612757 in 2001, skipping...
File already exists for team 1610612765 in 2001, skipping...
File already exists for team 1610612750 in 2001, skipping...
File already exists for team 1610612741 in 2001, skipping...
File already exists for team 1610612760 in 2001, skipping...
File already exists for team 1610612755 in 2001, skipping...
File already exists for 

In [2]:
df

Unnamed: 0,EntityId,TeamId,Name,ShortName,RowId,TeamAbbreviation,SecondsPlayed,GamesPlayed,Minutes,PlusMinus,...,Offensive Fouls Drawn,StepOutOfBoundsTurnovers,NonShootingPenaltyNonTakeFoulsDrawn,Loose Ball Fouls Drawn,BlockingFouls,Charge Fouls Drawn,Charge Fouls,BlockingFoulsDrawn,Transition Take Fouls,Transition Take Fouls Drawn
0,1032-1501-1747-281-283,1610612749,"Darvin Ham, Tim Thomas, Rafer Alston, Scott Wi...","Ham, Thomas, Alston, Williams, Hunter",1032-1501-1747-281-283,MIL,52.0,1,1.0,2.0,...,,,,,,,,,,
1,1032-1501-1747-283-299,1610612749,"Darvin Ham, Tim Thomas, Rafer Alston, Lindsey ...","Ham, Thomas, Alston, Hunter, Robinson",1032-1501-1747-283-299,MIL,67.0,1,1.0,1.0,...,,,,,,,,,,
2,1032-1501-1747-283-998,1610612749,"Darvin Ham, Tim Thomas, Rafer Alston, Lindsey ...","Ham, Thomas, Alston, Hunter, Pope",1032-1501-1747-283-998,MIL,81.0,1,1.0,-1.0,...,,,,,,,,,,
3,1032-1501-208-281-951,1610612749,"Darvin Ham, Tim Thomas, Sam Cassell, Scott Wil...","Ham, Thomas, Cassell, Williams, Allen",1032-1501-208-281-951,MIL,111.0,1,2.0,-7.0,...,,,,,,,,,,
4,1032-1501-208-283-299,1610612749,"Darvin Ham, Tim Thomas, Sam Cassell, Lindsey H...","Ham, Thomas, Cassell, Hunter, Robinson",1032-1501-208-283-299,MIL,23.0,1,,2.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
147,1630639-1630702-1641765-202693-203501,1610612742,"A.J. Lawson, Jaden Hardy, Olivier-Maxence Pros...","Lawson, Hardy, Prosper, Morris, Hardaway Jr.",1630639-1630702-1641765-202693-203501,DAL,325.0,1,5.0,7.0,...,,,,,,,,,,
148,1630639-1630702-1641765-203939-203957,1610612742,"A.J. Lawson, Jaden Hardy, Olivier-Maxence Pros...","Lawson, Hardy, Prosper, Powell, Exum",1630639-1630702-1641765-203939-203957,DAL,157.0,1,3.0,-3.0,...,,,,,,,,,,
149,1630639-1630702-202681-203939-203957,1610612742,"A.J. Lawson, Jaden Hardy, Kyrie Irving, Dwight...","Lawson, Hardy, Irving, Powell, Exum",1630639-1630702-202681-203939-203957,DAL,2.0,1,,2.0,...,,,1.0,,,,,,,
150,1630639-1630702-202693-203501-203939,1610612742,"A.J. Lawson, Jaden Hardy, Markieff Morris, Tim...","Lawson, Hardy, Morris, Hardaway Jr., Powell",1630639-1630702-202693-203501-203939,DAL,156.0,1,3.0,-6.0,...,,,,,,,,,,
