In [17]:
def table_name_gnereator_prompt(dataframe):

    # retrieving the first 3 rows only so as to not feed the entire csv into the gpt model
    csv_sample = dataframe.head(3).to_csv(index=False)

    return f"""
    You are a smart assistant. Based on the following CSV sample, generate a short, meaningful name for the table.

    Instructions:
    - Respond with only the name — no explanation, no punctuation, and no formatting.
    - You may use spaces and line breaks freely in your response where appropriate.
    - Preserve the format and indentation of any input text shown.

    CSV Sample:
    {csv_sample}

    Table name:"""

# getting the a mini LLM to read the data
from langchain_core.messages import AIMessage
from config import llm_mini

# function that generates a name
def name_generator(csv_file):
    
    main_prompt = table_name_gnereator_prompt(csv_file)
    main_prompt = AIMessage(content=main_prompt)

    response = llm_mini.invoke([main_prompt])

    return response.content


In [21]:
excel_file = "user_data.xlsx"

In [19]:
import hashlib
import pandas as pd

table_dict = {}


# Load the Excel file
xls = pd.ExcelFile(excel_file, engine="openpyxl")
sheet_names = xls.sheet_names

for given_name in sheet_names:
    
    table_hash= hashlib.md5(given_name.encode()).hexdigest()[:8]

    df_sheet = pd.read_excel(xls, sheet_name=given_name)

    # function that reads to create a new name
    new_name = name_generator(df_sheet)
    print(new_name)

    table_dict[table_hash] = {
        "given_name": given_name,
        "rename_name": new_name,
        "csv_data": df_sheet.to_csv(index=False)
    }

Code Completion Sample Counts
Language Model Category Scores
Language Model Performance on OSS vs Proprietary Data
Language Model Code Translation Quality Metrics
Code Translation Quality by Model and BM25
C Sharp Code Davinci 002 Shot Source Performance
C Sharp Code Davinci 002 BLEU Scores by Shot Source
Identifier and Nonidentifier Accuracy by Model and Language


In [30]:
# storing these info into a summary excel

import shutil

# Destination file path
intended_file = "extracted_tables.xlsx"

# Copying the file
try:
    shutil.copyfile(excel_file, intended_file)

except Exception as e:
    # returning a dataframe contianing the error
    my_string = f"❌ Unexpected error: {e}"
    df = pd.DataFrame(["Unexpected Error with reading the PDF"], columns=["content"])

    print(my_string)
    # return df

# creating the summary table
with pd.ExcelWriter(intended_file, engine="openpyxl", mode="a", if_sheet_exists="replace") as writer:

    # summary sheet
    summary_rows = [
        (table_hex_name, info["given_name"], info["rename_name"])
        for table_hex_name, info in table_dict.items()
    ]
    summary_df = pd.DataFrame(summary_rows, columns=["table_hash", "given_name", "rename_name"])
    summary_df.to_excel(writer, sheet_name="Summary", index=False)

# printing the summary table
# Load the summary sheet back and print it
summary = pd.read_excel(intended_file, sheet_name="Summary", engine="openpyxl")