This code block handles the import statements and data loading, to keep the notebook clean.

In [1]:
import pandas as pd
import kagglehub

#Philadelphia 2025 Crime Statistics
phil_2025 = pd.read_csv("/Users/pwax/Desktop/mcdermid_waxman_final_project/data/incidents_part1_part2.csv")
path_nfl = kagglehub.dataset_download("keonim/nfl-game-scores-dataset-2017-2023")
game_dates = pd.read_csv(f"{path}/Season_Scores/2024_scores.csv")


In [215]:
#game_date takes the team and year, and returns data_frame
def nfl_game_date(year, team):
    #load the specific year
    df = pd.read_csv(f"{path_nfl}/Season_Scores/{year}_scores.csv")
    df = df.dropna(subset=['Date'])

    
    #subsets the df to games where the team is playing
    df_away = df[(df["AwayTeam"] == team)]
    df_home = df[(df["HomeTeam"] == team)]
        
    home_dates_with_year = df_home['Date'].apply(
        lambda x: f"{x}/{year+1}" if int(str(x).split('/')[0]) <= 6 
        else f"{x}/{year}")    
    away_dates_with_year = df_away['Date'].apply(
        lambda x: f"{x}/{year+1}" if int(str(x).split('/')[0]) <= 6 
        else f"{x}/{year}")
    
    # Now convert to datetime
    home_game_dates = pd.to_datetime(home_dates_with_year)
    away_game_dates = pd.to_datetime(away_dates_with_year)

    #Gets the result of the game
    home_game_win = df_home['HomeWin'].to_list()
    away_game_win = df_away['AwayWin'].to_list()

    #Convert to boolean
    home_game_win = [bool(x) for x in home_game_win]
    away_game_win = [bool(x) for x in away_game_win]

    #Creates an away, home df, with the pairs for results and tags the Location
    home = pd.DataFrame(list(zip(home_game_dates, home_game_win)))
    away = pd.DataFrame(list(zip(away_game_dates, away_game_win)))
    home["Location"] = "Home"
    away["Location"] = "Away"

    #puts the two together and sorts them by date
    season = pd.concat([home, away], ignore_index=True) 
    season.columns = ["Date", "Result", "Location"]
    season = season.sort_values('Date').reset_index(drop=True)    
    
    return season

In [174]:
path_nba = kagglehub.dataset_download("eoinamoore/historical-nba-data-and-player-box-scores")

print("Path to dataset files:", path)

Path to dataset files: /Users/pwax/.cache/kagglehub/datasets/eoinamoore/historical-nba-data-and-player-box-scores/versions/192


In [234]:
def nba_game_date(team,year):
    #first get teamId for this dataset
    team_df = pd.read_csv(f"{path_nba}/TeamHistories.csv")

    #searches the dataset for the ID, returns nothing if not found
    team_id = team_df[team_df['teamName'] == team]['teamId']
    if not team_id.empty:
        team_id = team_id.iloc[0]
    else:
        print("Team not found in dataset, look at the documentation.")
        return

    #use team_id and year to sort games wanted
    df = pd.read_csv(f"{path_nba}/Games.csv", low_memory=False)
    df["gameDate"] = pd.to_datetime(df["gameDate"])
    start = pd.to_datetime(f"{year}-10-01")
    end = pd.to_datetime(f"{year+1}-06-30")
    df_home = df[(df["hometeamId"] == team_id) & ((df['gameDate'] >= start) & (df['gameDate'] <= end))]
    df_away = df[(df["awayteamId"] == team_id) & ((df['gameDate'] >= start) & (df['gameDate'] <= end))]


    home_game_dates = df_home["gameDate"].to_list()
    away_game_dates = df_away["gameDate"].to_list()
    
    home_game_win = [x == team_id for x in df_home['winner']]
    away_game_win = [x == team_id for x in df_away['winner']]

    #Creates an away, home df, with the pairs for results and tags the Location
    home = pd.DataFrame(list(zip(home_game_dates, home_game_win)))
    away = pd.DataFrame(list(zip(away_game_dates, away_game_win)))
    home["Location"] = "Home"
    away["Location"] = "Away"
    season = pd.concat([home, away], ignore_index=True) 
    season.columns = ["Date", "Result", "Location"]
    season = season.sort_values('Date').reset_index(drop=True)    
    
    return season

In [239]:
x = nba_game_date("Warriors",2015)

In [240]:
x

Unnamed: 0,Date,Result,Location
0,2015-10-05 22:30:00,True,Home
1,2015-10-08 22:00:00,False,Away
2,2015-10-13 22:30:00,False,Home
3,2015-10-15 22:00:00,True,Home
4,2015-10-17 22:00:00,False,Away
...,...,...,...
108,2016-06-08 21:00:00,False,Away
109,2016-06-10 21:00:00,True,Away
110,2016-06-13 21:00:00,False,Home
111,2016-06-16 21:00:00,False,Away
