<a href="https://colab.research.google.com/github/CalvinHh/LLM-projects/blob/main/NBA_LLM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install nba_api

Collecting nba_api
  Downloading nba_api-1.10.0-py3-none-any.whl.metadata (5.8 kB)
Downloading nba_api-1.10.0-py3-none-any.whl (285 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m285.3/285.3 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: nba_api
Successfully installed nba_api-1.10.0


In [None]:
import time
import google.generativeai as genai
from google.colab import userdata

# Retrieve your API key securely from Colab Secrets
# Ensure the secret name 'GOOGLE_API_KEY' exactly matches what you set in Colab's 'Secrets' tab.
try:
    GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')
    genai.configure(api_key=GOOGLE_API_KEY)
except Exception as e:
    print(f"Error retrieving API key or configuring Gemini API: {e}")
    print("Please ensure your 'GOOGLE_API_KEY' is set in Colab's 'Secrets' tab.")
    exit() # Exit if API key setup fails

# Initialize the Generative Model
# 'gemini-1.5-flash' is often a good choice for general use due to its speed and cost-effectiveness.
# You can switch to 'gemini-1.5-pro' if you need more advanced reasoning,
# but be mindful of potentially stricter rate limits on the free tier.
model = genai.GenerativeModel('gemini-1.5-flash')

# Define your list of prompts
prompts = [
    "What is the capital of China?",
    "What is the lake in China?",
    "谁写了阿甘正传?"
]

print("Starting AI model requests...\n")

# Loop through prompts and get responses
for i, prompt in enumerate(prompts):
    print(f"Prompt {i+1}: {prompt}")
    try:
        response = model.generate_content(prompt)
        print(f"Response: {response.text}\n")
    except Exception as e:
        print(f"An error occurred for prompt '{prompt}': {e}\n")

    # Introduce a delay between requests to avoid hitting rate limits.
    # Adjust this value based on your specific API quota (e.g., 1-5 seconds).
    if i < len(prompts) - 1: # Don't sleep after the last request
        time.sleep(2) # Sleep for 2 seconds

print("All requests processed.")

Starting AI model requests...

Prompt 1: What is the capital of China?
Response: Beijing


Prompt 2: What is the lake in China?
Response: There are many lakes in China!  To answer your question properly, I need more information.  Which lake are you interested in?  For example, are you thinking of:

* **Lake Taihu:** One of the largest freshwater lakes in China.
* **Lake Poyang:** The largest freshwater lake in China (though its size varies greatly depending on the season).
* **Dian Lake:** A large lake in Yunnan province.
* **Qinghai Lake:** The largest saltwater lake in China.
* **Erhai Lake:**  A beautiful lake in Yunnan province.


Please provide more details about the lake you're curious about.


Prompt 3: 谁写了阿甘正传?
Response: 《阿甘正传》的作者是 **温斯顿·格鲁姆**.


All requests processed.


In [None]:
from nba_api.stats.endpoints import playercareerstats
from nba_api.stats.static import players

# Example: Get Stephen Curry's career stats
# 1. Find the player ID
ja_morant = [player for player in players.get_players() if player['full_name'] == 'Ja Morant'][0]
ja_morant_id = ja_morant['id']
print(f"ja_morant's ID: {ja_morant_id}")

# 2. Get career stats
career_stats = playercareerstats.PlayerCareerStats(player_id=ja_morant_id)

# 3. Convert to a Pandas DataFrame for easier viewing
career_df = career_stats.get_data_frames()[0] # [0] usually contains the regular season totals
print("\nja morant's Career Stats:")
print(career_df.head())

ja_morant's ID: 1629630

ja morant's Career Stats:
   PLAYER_ID SEASON_ID LEAGUE_ID     TEAM_ID TEAM_ABBREVIATION  PLAYER_AGE  \
0    1629630   2019-20        00  1610612763               MEM        20.0   
1    1629630   2020-21        00  1610612763               MEM        21.0   
2    1629630   2021-22        00  1610612763               MEM        22.0   
3    1629630   2022-23        00  1610612763               MEM        23.0   
4    1629630   2023-24        00  1610612763               MEM        24.0   

   GP  GS   MIN  FGM  ...  FT_PCT  OREB  DREB  REB  AST  STL  BLK  TOV   PF  \
0  67  67  2074  447  ...   0.776    51   208  259  488   58   18  220  110   
1  63  63  2053  430  ...   0.728    59   193  252  465   57   13  203   87   
2  57  57  1889  580  ...   0.761    77   248  325  384   66   22  196   86   
3  61  59  1948  566  ...   0.748    61   296  357  493   66   16  206  100   
4   9   9   318   80  ...   0.813     6    44   50   73    7    5   27   19   

    P

In [None]:
from nba_api.stats.endpoints import boxscoretraditionalv2, teamgamelog
from nba_api.stats.static import teams
import pandas as pd
import time
import google.generativeai as genai
from google.colab import userdata
from datetime import datetime, timedelta # Still useful for context, but not directly for finding game_id in this version

# --- API Key Configuration ---
try:
    GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')
    genai.configure(api_key=GOOGLE_API_KEY)
except Exception as e:
    print(f"Error: Could not retrieve API key or configure Gemini API. Details: {e}")
    print("Please ensure your 'GOOGLE_API_KEY' is correctly set in Colab's 'Secrets' tab.")
    exit()

model = genai.GenerativeModel('gemini-1.5-flash') # Using flash for faster iteration

# --- HARDCODED GAME ID & TEAM NAMES ---
# This bypasses the LeagueGameFinder, which was causing the JSONDecodeError.
# This is the Game ID for Pacers vs. Thunder on June 11, 2025 (Game 3 of the Finals).
game_id = '0042400403'
home_team_name = 'Indiana Pacers' # As Indiana was the home team for Game 3
away_team_name = 'Oklahoma City Thunder'

print(f"Using hardcoded Game ID: {game_id} for {home_team_name} vs {away_team_name}")

# --- Helper function to get Team ID by Full Name ---
# This is still needed for filtering the DataFrames based on team names.
def get_team_id_by_full_name(full_name):
    """Retrieves a team's ID from its full name using nba_api static data."""
    all_nba_teams = teams.get_teams()
    for team in all_nba_teams:
        if team['full_name'] == full_name:
            return team['id']
    return None # Return None if team not found

# --- Fetch Game Box Score ---
try:
    box_score = boxscoretraditionalv2.BoxScoreTraditionalV2(game_id=game_id)
    team_stats_df = box_score.get_data_frames()[1] # Team stats are usually the second DataFrame
    player_stats_df = box_score.get_data_frames()[0] # Player stats are usually the first DataFrame

    # Get team IDs using the helper function
    home_team_id = get_team_id_by_full_name(home_team_name)
    away_team_id = get_team_id_by_full_name(away_team_name)

    if home_team_id is None or away_team_id is None:
        print(f"Could not find IDs for {home_team_name} or {away_team_name}. Please check names. Exiting.")
        exit()

    home_game_stats = team_stats_df[team_stats_df['TEAM_ID'] == home_team_id].to_string(index=False)
    away_game_stats = team_stats_df[team_stats_df['TEAM_ID'] == away_team_id].to_string(index=False)

    # Select relevant player stats (e.g., top 5 scorers for each team)
    home_players = player_stats_df[player_stats_df['TEAM_ID'] == home_team_id].sort_values(by='PTS', ascending=False).head(5)[['PLAYER_NAME', 'PTS', 'AST', 'REB', 'FG_PCT']]
    away_players = player_stats_df[player_stats_df['TEAM_ID'] == away_team_id].sort_values(by='PTS', ascending=False).head(5)[['PLAYER_NAME', 'PTS', 'AST', 'REB', 'FG_PCT']]

    player_summary = (
        f"Top 5 {home_team_name} players:\n{home_players.to_string(index=False)}\n\n"
        f"Top 5 {away_team_name} players:\n{away_players.to_string(index=False)}"
    )

    time.sleep(2) # Be mindful of nba_api rate limits after the box score call

    # --- Construct Prompt ---
    prompt = f"""
    Analyze the outcome of this NBA Finals game based on the provided statistics.
    Explain the key factors that led to the {home_team_name} victory over the {away_team_name}.
    Highlight individual player performances, team efficiency, and any other notable statistical trends that influenced the result.

    --- Game Box Score ---
    {home_team_name} Team Stats:
    {home_game_stats}

    {away_team_name} Team Stats:
    {away_game_stats}

    --- Key Player Performances ---
    {player_summary}
    """

    # --- Send to LLM and Print Response ---
    print("Requesting analysis from LLM...")
    try:
        response = model.generate_content(prompt)
        print("\n--- Game Analysis ---")
        print(response.text)
    except Exception as e:
        print(f"Error generating content: {e}")

except Exception as e:
    print(f"Error fetching NBA game data for Game ID {game_id}: {e}")

Using hardcoded Game ID: 0042400403 for Indiana Pacers vs Oklahoma City Thunder
Requesting analysis from LLM...

--- Game Analysis ---
The Indiana Pacers secured a victory over the Oklahoma City Thunder with a final score of 116-107, a 9-point margin.  Several key factors contributed to this outcome:

**1. Superior Shooting Efficiency:** The Pacers demonstrated significantly better overall shooting efficiency.  Their field goal percentage (51.8%) was notably higher than the Thunder's (46.8%). This advantage stemmed from both higher overall field goal percentage and a better performance from three-point range (Pacers 33.3%, Thunder 45.5%). While the Thunder made more three-pointers, the Pacers' higher overall FG% ultimately proved decisive.

**2. Dominant Offensive Performance by Mathurin and Haliburton:**  Bennedict Mathurin's exceptional performance (27 points, 75% FG) was a significant driver of the Pacers' win.  His high field goal percentage demonstrates both efficiency and effecti

In [None]:
from nba_api.stats.endpoints import boxscoretraditionalv2, teamgamelog
from nba_api.stats.static import teams
import pandas as pd
import time
import google.generativeai as genai
from google.colab import userdata
from datetime import datetime, timedelta

# --- API Key Configuration ---
try:
    GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')
    genai.configure(api_key=GOOGLE_API_KEY)
except Exception as e:
    print(f"Error: Could not retrieve API key or configure Gemini API. Details: {e}")
    print("Please ensure your 'GOOGLE_API_KEY' is correctly set in Colab's 'Secrets' tab.")
    exit()

model = genai.GenerativeModel('gemini-1.5-flash')

# --- HARDCODED GAME ID & TEAM NAMES ---
game_id = '0042400403'
home_team_name = 'Indiana Pacers'
away_team_name = 'Oklahoma City Thunder'

print(f"Using hardcoded Game ID: {game_id} for {home_team_name} vs {away_team_name}")

# --- Helper function to get Team ID by Full Name ---
def get_team_id_by_full_name(full_name):
    all_nba_teams = teams.get_teams()
    for team in all_nba_teams:
        if team['full_name'] == full_name:
            return team['id']
    return None

# --- Fetch Game Box Score ---
try:
    box_score = boxscoretraditionalv2.BoxScoreTraditionalV2(game_id=game_id)
    team_stats_df = box_score.get_data_frames()[1]
    player_stats_df = box_score.get_data_frames()[0]

    home_team_id = get_team_id_by_full_name(home_team_name)
    away_team_id = get_team_id_by_full_name(away_team_name)

    if home_team_id is None or away_team_id is None:
        print(f"Could not find IDs for {home_team_name} or {away_team_name}. Please check names. Exiting.")
        exit()

    home_game_stats = team_stats_df[team_stats_df['TEAM_ID'] == home_team_id].to_string(index=False)
    away_game_stats = team_stats_df[team_stats_df['TEAM_ID'] == away_team_id].to_string(index=False)

    home_players = player_stats_df[player_stats_df['TEAM_ID'] == home_team_id].sort_values(by='PTS', ascending=False).head(5)[['PLAYER_NAME', 'PTS', 'AST', 'REB', 'FG_PCT']]
    away_players = player_stats_df[player_stats_df['TEAM_ID'] == away_team_id].sort_values(by='PTS', ascending=False).head(5)[['PLAYER_NAME', 'PTS', 'AST', 'REB', 'FG_PCT']]

    player_summary = (
        f"Top 5 {home_team_name} players:\n{home_players.to_string(index=False)}\n\n"
        f"Top 5 {away_team_name} players:\n{away_players.to_string(index=False)}"
    )

    time.sleep(2)

except Exception as e:
    print(f"Error fetching NBA game data for Game ID {game_id}: {e}")
    exit() # Exit if data fetching fails

# --- Construct Prompt (MODIFIED FOR SHORTER ANSWER) ---
prompt = f"""
Analyze the outcome of this NBA Finals game based on the provided statistics.
**Provide a concise explanation, focusing on the 2-3 most critical factors** that led to the {home_team_name} victory over the {away_team_name}.
Highlight the most impactful individual player performances and team statistical trends. **Keep the response brief, no more than 3-4 paragraphs.**

--- Game Box Score ---
{home_team_name} Team Stats:
{home_game_stats}

{away_team_name} Team Stats:
{away_game_stats}

--- Key Player Performances ---
{player_summary}
"""

# --- Send to LLM and Print Response ---
print("Requesting analysis from LLM...")
try:
    response = model.generate_content(prompt)
    print("\n--- Game Analysis ---")
    print(response.text)
except Exception as e:
    print(f"Error generating content: {e}")

Using hardcoded Game ID: 0042400403 for Indiana Pacers vs Oklahoma City Thunder
Requesting analysis from LLM...

--- Game Analysis ---
The Indiana Pacers secured a victory over the Oklahoma City Thunder primarily due to superior shooting efficiency and a significant advantage in assists.  The Pacers shot a significantly higher field goal percentage (51.8% vs 46.8%), showcasing better offensive execution and shot selection.  This advantage was amplified by their near double the amount of assists (24 vs 16), indicating better ball movement and playmaking which led to higher-percentage shots.  The Thunder's higher free throw attempts (30 vs 22) wasn't enough to offset these disparities.


Bennedict Mathurin's exceptional performance (27 points on 75% shooting) was crucial for the Pacers, providing a potent offensive punch.  Tyrese Haliburton's 22 points and 11 assists further underscored Indiana's offensive fluidity and efficiency.  In contrast, while Shai Gilgeous-Alexander (24 points) p

In [None]:
from IPython.display import display, HTML

# Retrieve the LLM's analysis text safely
llm_analysis_text = "AI analysis could not be retrieved. Please ensure the previous cell ran successfully."
if 'response' in locals() and response is not None:
    llm_analysis_text = response.text
else:
    print(llm_analysis_text) # Print to console for immediate feedback if LLM response is missing

# Safely get yesterday_str for display, if it exists from the previous cell
date_display = 'N/A'
try:
    if 'yesterday_str' in globals():
        date_display = yesterday_str
except NameError:
    pass

# Helper to format DataFrame strings for HTML <pre> tag (escapes HTML entities if needed, though to_string is usually safe)
def format_for_pre(df_string):
    return df_string.replace('<', '&lt;').replace('>', '&gt;')

# --- Constructing the HTML Report with Scrollable Sections ---
html_report = f"""
<div style="font-family: Arial, sans-serif; border: 1px solid #e0e0e0; border-radius: 8px; padding: 15px; background-color: #ffffff;">
    <h2 style="color: #2c3e50; text-align: center; margin-bottom: 20px;">🏀 NBA Game Analysis Report 🏀</h2>
    <hr style="border-top: 1px solid #e0e0e0;">

    <div style="margin-bottom: 20px;">
        <h3 style="color: #34495e;">Game Overview</h3>
        <ul style="list-style-type: none; padding: 0; margin: 0;">
            <li style="margin-bottom: 5px;"><strong>Game ID:</strong> <code>{game_id}</code></li>
            <li style="margin-bottom: 5px;"><strong>Matchup:</strong> <strong>{home_team_name}</strong> vs. <strong>{away_team_name}</strong></li>
            <li style="margin-bottom: 5px;"><strong>Date:</strong> {date_display}</li>
        </ul>
    </div>

    <hr style="border-top: 1px solid #e0e0e0;">

    <div style="margin-bottom: 20px;">
        <h3 style="color: #34495e;">Team Statistics</h3>
        <h4 style="color: #555; margin-bottom: 5px;">{home_team_name} - Team Box Score</h4>
        <div style="max-height: 150px; overflow-y: auto; border: 1px solid #f0f0f0; background-color: #fcfcfc; padding: 10px; border-radius: 4px;">
            <pre style="margin: 0; white-space: pre-wrap; word-wrap: break-word;">{format_for_pre(home_game_stats)}</pre>
        </div>
        <h4 style="color: #555; margin-top: 15px; margin-bottom: 5px;">{away_team_name} - Team Box Score</h4>
        <div style="max-height: 150px; overflow-y: auto; border: 1px solid #f0f0f0; background-color: #fcfcfc; padding: 10px; border-radius: 4px;">
            <pre style="margin: 0; white-space: pre-wrap; word-wrap: break-word;">{format_for_pre(away_game_stats)}</pre>
        </div>
    </div>

    <hr style="border-top: 1px solid #e0e0e0;">

    <div style="margin-bottom: 20px;">
        <h3 style="color: #34495e;">Key Player Performances</h3>
        <div style="max-height: 200px; overflow-y: auto; border: 1px solid #f0f0f0; background-color: #fcfcfc; padding: 10px; border-radius: 4px;">
            <pre style="margin: 0; white-space: pre-wrap; word-wrap: break-word;">{format_for_pre(player_summary)}</pre>
        </div>
    </div>

    <hr style="border-top: 1px solid #e0e0e0;">

    <div style="background-color: #e8f5e9; padding: 15px; border-radius: 8px;">
        <h3 style="color: #2e7d32;">AI-Powered Game Analysis (Gemini LLM)</h3>
        <div style="max-height: 250px; overflow-y: auto; padding-right: 10px;"> <!-- Added padding-right for scrollbar space -->
            <p style="margin: 0; white-space: pre-wrap; word-wrap: break-word;">{llm_analysis_text}</p>
        </div>
    </div>

    <hr style="border-top: 1px dashed #cccccc; margin-top: 20px;">
    <p style="text-align: center; font-size: 0.85em; color: #777;">Report generated by Gemini LLM & nba_api</p>
</div>
"""

# Display the formatted HTML in the Colab output
display(HTML(html_report))