# Tabular Data RAG

In [None]:
import os
import pandas as pd

# Required to convert to markdown
%pip install tabulate

# OpenAI dependencies
%pip install openai==1.55.3
from openai import AzureOpenAI

%pip install azure-identity
from azure.identity import DefaultAzureCredential, get_bearer_token_provider

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


## Load Datasets

In [9]:
# years of evaluation
start_year = 2019
end_year = 2023

# datasets files
teams_cleaned_dataset_file = os.path.join("datasets", "teams_clean.csv")
standings_cleaned_dataset_file = os.path.join("datasets" ,"standings_clean.csv")
games_cleaned_dataset_file = os.path.join("datasets", "games_clean.csv")

# seasons to evaluate
seasons_to_evaluate = [year for year in range(start_year, end_year + 1)]

# read standings dataset
standings = pd.read_csv(standings_cleaned_dataset_file)

standings = standings.drop(standings[~standings["season"].isin(seasons_to_evaluate)].index)

In [10]:
# filder standings by team name NE 
#standings_filtered = standings[standings['team'] == 'NE']

# convert standings to markdown
standings_markdown = standings.to_markdown()

print(standings_markdown)

|     |   season | conf   | division   | team   |   wins |   losses |   ties |      pct |   div_rank |   scored |   allowed |   net |   seed | playoff   | team_name             |
|----:|---------:|:-------|:-----------|:-------|-------:|---------:|-------:|---------:|-----------:|---------:|----------:|------:|-------:|:----------|:----------------------|
| 160 |     2019 | AFC    | AFC East   | BUF    |     10 |        6 |      0 | 0.625    |          2 |      314 |       259 |    55 |      5 | LostWC    | Buffalo Bills         |
| 161 |     2019 | AFC    | AFC East   | MIA    |      5 |       11 |      0 | 0.3125   |          4 |      306 |       494 |  -188 |    nan | nan       | Miami Dolphins        |
| 162 |     2019 | AFC    | AFC East   | NE     |     12 |        4 |      0 | 0.75     |          1 |      420 |       225 |   195 |      3 | LostWC    | New England Patriots  |
| 163 |     2019 | AFC    | AFC East   | NYJ    |      7 |        9 |      0 | 0.4375   |          3 |   

## Run Language Model

In [18]:
MESSAGE_SYSTEM_CONTENT = """You are a customer service agent that helps a customer with answering questions about NFL statistical data. 
Please answer the question based on the provided context below.
Te context is a table in markdown format that contains NFL standings data for the 2019-2023 seasons.
Make sure not to make any changes to the context, if possible, when preparing answers to provide accurate responses. 
If the answer cannot be found in context, just politely say that you do not know, do not try to make up an answer."""

In [12]:
token_provider = get_bearer_token_provider(  
    DefaultAzureCredential(),  
    "https://cognitiveservices.azure.com/.default"  
)
client = AzureOpenAI(
    azure_ad_token_provider=token_provider,
    api_version="2024-08-01-preview", 
    azure_endpoint="https://rag-research.openai.azure.com/")

In [13]:

def response_test(question:str, context:str, model:str = "gpt-4o"):
    response = client.chat.completions.create(
        model=model,
        messages=[
            {
                "role": "system",
                "content": MESSAGE_SYSTEM_CONTENT,
            },
            {"role": "user", "content": question},
            {"role": "assistant", "content": context},
        ],
    )
    
    return response.choices[0].message.content

In [22]:
response = response_test("What is the div_rank for the 2020 NFL season for New England Patriots?", standings_markdown)
print(response)

The division rank for the New England Patriots in the 2020 NFL season was 3.


In [15]:
response = response_test("What is the average of scored points for New England Patriots?", standings_markdown)
print(response)

The provided data shows the New England Patriots' scored points for the past five seasons:

- 2019: 420 points
- 2020: 326 points
- 2021: 462 points
- 2022: 364 points
- 2023: 236 points

To calculate the average scored points over these seasons, you can add the points together and divide by the number of seasons:

(420 + 326 + 462 + 364 + 236) / 5 = 361.6

The average scored points for the New England Patriots over these five seasons is 361.6 points per season.


In [19]:
response = response_test("Which team has the best scored points average in the seasons provided?", standings_markdown)
print(response)

To determine which team has the best scored points average over the seasons provided (2019-2023), we need to calculate the average points scored per season for each team and compare them. It seems that the Kansas City Chiefs in 2020 scored 473 points over 16 games, and in 2022, they scored 496 points over 17 games, which might give them a significant average.

However, to provide a definitive answer, all teams' total scored points over the seasons would need to be divided by the number of games they played (not necessarily all played 17 games in each season) and compared. Unfortunately, a full calculation hasn't been provided here, so I recommend reviewing the points scored by each team across all seasons to definitively determine which has the best average.


In [20]:
response = response_test("What is the team with more wins in the data provided?", standings_markdown)
print(response)

The Kansas City Chiefs have the most wins in the data provided, with a total of 63 wins over the five seasons from 2019 to 2023.


In [21]:
response = response_test("What is the team with less total wins in the provided seasons?", standings_markdown)
print(response)

The team with the fewest total wins over the provided seasons is the Jacksonville Jaguars in the 2020 season, who had 1 win and 15 losses.
