In [1]:
!pip install transformers pandas torch gpytorch==1.12 requests

import os
import re
import pandas as pd
import matplotlib.pyplot as plt
import warnings
import torch
import requests
import lam_adapt
import numpy as np
from pathlib import Path
from google.colab import files
from transformers import BertTokenizer, BertForQuestionAnswering, pipeline

search_url = "https://api.github.com/repos/hwlee924/Large-Airfoil-Model/contents/ASPIRE/Airfoils"
headers = {"Authorization": "token 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"}

"""
loads the BERT model, uses gpu if available
"""
def load_bert_pipeline():
    tokenizer = BertTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
    model = BertForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)
    print(f"Using device: {device}")

    qa_pipeline = pipeline("question-answering", model=model, tokenizer=tokenizer, device=0 if device == "cuda" else -1)
    return qa_pipeline


"""
loads the LAM model, uses gpu if available
"""

def load_lam_model():
    notebook_dir = Path().resolve()
    os.chdir(notebook_dir)
    use_gpu = True if torch.cuda.is_available() else False
    model, likelihood = lam_adapt.unpack_model(use_gpu=use_gpu)
    return model, use_gpu

"""
uses bert to extract data from the user query
query: user input, qa_pipeline: the BERT model
"""
def extract_query_parameters(query, qa_pipeline):

    #background context for the model to use when answering questions
    structured_context = f"""
    The user wants to plot center of pressure distribution.
    The airfoil name, angle of attack, Mach number, and Reynolds number are mentioned in the query.
    Extract these values from the following user request: {query}
    """

    #questions for the model to find the various values in the user's prompt
    parameters = {
        "Airfoil Name": "What is the airfoil name?",
        "Angle of Attack": "What is the angle of attack in degrees, allowing for optional negative degrees?",
        "Mach Number": "What is the Mach number?",
        "Reynolds Number": "What is the Reynolds number or re?"
    }

    extracted_parameters = {}

    for param, question in parameters.items():
        try:
            #uses BERT to extract the data from the query
            response = qa_pipeline(question=question, context=structured_context)
            extracted_value = response["answer"]

            print(f"BERT raw output for {param}: {extracted_value}")

            #remove any commas from the extracted data
            extracted_value = extracted_value.replace(",", "").strip()

            if param == "Airfoil Name":
                extracted_parameters[param] = extracted_value
            else:
                #extract only the numerical values
                match = re.search(r"[-+]?\d*\.?\d+(?:[eE][-+]?\d+)?", extracted_value)
                extracted_parameters[param] = float(match.group(0)) if match else None

        except Exception as e:
            #handle errors if unable to extract value
            print(f"Error extracting {param}: {e}")
            extracted_parameters[param] = None

    print(f"Extracted Parameters: {extracted_parameters}")
    return extracted_parameters

"""
returns coordinates file for a specific airfoil
"""

def get_airfoil_geometry(file_dict, subfolder):
    if subfolder is None or subfolder not in file_dict: #use root if no subfolder
        subfolder = "root"
    for filename, url in file_dict[subfolder].items():
        if filename.endswith("_coordinates.csv") or filename.endswith("_coordinates_.csv"): #find coordinates file
            response = requests.get(url, headers=headers)
            if response.status_code != 200:
                print("Error fetching airfoil coordinates file from GitHub.")
                return None
            with open(filename, 'wb') as f:
                f.write(response.content)
            print(f"Using coordinates file: {filename}")
            return filename
    return None

"""
fetches the list of available airfoil data files from GitHub given the airfoil name
"""
def get_airfoil_filenames(airfoil_name):
    response = requests.get(search_url, headers=headers)
    if response.status_code != 200:
        print(response.status_code)
        print("Error fetching airfoil list from GitHub.")
        return None

    airfoil_files = response.json()
    available_airfoils = {entry['name']: entry['url'] for entry in airfoil_files if entry['type'] == 'dir'} #create dictionary of all folders
    matching_airfoils = [name for name in available_airfoils.keys() if airfoil_name.lower() in name.lower()] #checks for a match with the airfoil folder

    if not matching_airfoils:
        return None

    selected_airfoil = matching_airfoils[0]
    print(f"Using airfoil: {selected_airfoil}")

    airfoil_url = available_airfoils[selected_airfoil]
    response = requests.get(airfoil_url, headers=headers) #gets files for airfoil
    if response.status_code != 200:
        print(f"Error fetching files for airfoil '{selected_airfoil}'.")
        return None

    airfoil_data_files = response.json()
    subfolder_urls = {entry['name']: entry['url'] for entry in airfoil_data_files if entry['type'] == 'dir'} #checks for subfolders
    file_dict = {}

    if subfolder_urls:
        for subfolder_name, subfolder_url in subfolder_urls.items():
            subfolder_response = requests.get(subfolder_url, headers=headers)
            if subfolder_response.status_code != 200:
                continue
            subfolder_files = subfolder_response.json()
            file_dict[subfolder_name] = {entry['name']: entry['download_url'] for entry in subfolder_files if entry['type'] == 'file'} #adds subfolders to dictionary

    if not file_dict:
        file_dict["root"] = {entry['name']: entry['download_url'] for entry in airfoil_data_files if entry['type'] == 'file'} #subfolder is root if no subfolders

    return file_dict


"""
uses regex to extract values from the given filename
"""
def extract_metadata_from_filename(filename):
    print(f"Checking filename: {filename}")

    #uses regex to search for angle of attack value, mach number, and reynolds number in the file
    match = re.search(r'A(m?\d*\.\d+|m?\d+)_M([-+]?\d*\.\d+|\d+)_Re((?:\d+\.\d+|\d+)(?:e[+-]?\d+)?)', filename, re.IGNORECASE)

    if match:
        aoa_value = match.group(1)
        if aoa_value.startswith("m"):
            aoa_value = -float(aoa_value[1:]) #adds a negative sign to angle of attack values starting with 'm'
        else:
            aoa_value = float(aoa_value)

        extracted_data = {
            "angle_of_attack": aoa_value,
            "mach_number": float(match.group(2)),
            "reynolds_number": float(match.group(3)),
            "filename": filename
        }
        print(f"Extracted from {filename}: {extracted_data}")
        return extracted_data

    print(f"No match found for {filename}")
    return None


"""
Finds the best matching file based on the user's query parameters.
"""
def find_best_matching_file(query_params, file_dict, angle_score, mach_score, re_score):
    best_matches = []
    best_score = float('inf')

    for subfolder, files in file_dict.items(): #check for score by subfolder
        best_subfolder_score = float('inf')
        best_subfolder_match = None
        for filename, url in files.items():
            metadata = extract_metadata_from_filename(filename)
            if not metadata:
                continue

            angle_diff = abs(metadata["angle_of_attack"] - query_params["Angle of Attack"])
            mach_diff = abs(metadata["mach_number"] - query_params["Mach Number"])
            re_diff = abs(metadata["reynolds_number"] - query_params["Reynolds Number"])

            if angle_diff < angle_score and mach_diff < mach_score and re_diff < re_score: #scores files based on similarity to query and how well it matches threshold
                score = angle_diff + mach_diff + re_diff
                if score <= best_subfolder_score:
                    best_subfolder_match = (filename, url, subfolder)
                    best_subfolder_score = score
        if best_subfolder_match:
            best_matches.append(best_subfolder_match)

    if best_matches:
        if len(best_matches) == 1:
            filename, url, subfolder = best_matches[0]
            print(f"Best matching file found in {subfolder}: {filename}")
            return url, subfolder
        else:
            print("Multiple matching files found in different subfolders:") #ask user which subfolder to use
            for i, (filename, url, subfolder) in enumerate(best_matches, 1):
                print(f"{i}. {filename} in {subfolder}")

            choice = int(input("Enter the number of the subfolder you want to use: ")) - 1
            if 0 <= choice < len(best_matches):
                filename, url, subfolder = best_matches[choice]
                print(f"Selected file from {subfolder}: {filename}")
                return url, subfolder
            else:
                print("Invalid selection.")
                return None, None

    print("No suitable file match found.")
    return None, None


"""
Downloads the airfoil data file and plots the Cp distribution.
"""
def download_and_plot_airfoil_data(file_url):

    #downloads the best matching file
    print(f"Downloading file from: {file_url}")
    response = requests.get(file_url)

    if response.status_code != 200:
        print("Error downloading file.")
        return

    data = response.text.split("\n") #split the file by lines
    data = data[1:] #skips the first line
    data = [line.replace(',', ' ') for line in data if line.strip()] #removes commas from file

    df = pd.DataFrame([line.split() for line in data], dtype=float) #creates a data frame object

    df = df.iloc[:, :2] #only uses first two columns

    #plots the data
    plt.figure(figsize=(8, 6))
    plt.plot(df.iloc[:, 0], df.iloc[:, 1], marker='o', linestyle='-')
    plt.xlabel("x/c")
    plt.ylabel("Cp")
    plt.title("Cp Distribution")
    plt.gca().invert_yaxis()
    plt.grid()
    plt.show()

"""
predicts distribution of cp using LAM
"""
def predict_distribution(query_params, coordinates_file, lam_model, use_gpu):
    print("predicting...")
    demo_num_points_per_surface = 120
    input_airfoil = coordinates_file
    angle_of_attack = query_params["Angle of Attack"]
    mach_number = query_params["Mach Number"]
    reynolds_number = query_params["Reynolds Number"]
    airfoil = lam_adapt.input_data(coordinates_file, angle_of_attack, mach_number, demo_num_points_per_surface, use_gpu=use_gpu)
    predictions = lam_model.predict(airfoil, get_coeff=True)

    # get prediction mean and standard deviation
    prediction_mean = predictions['cp_distribution'].mean.cpu().detach().numpy()
    prediction_sig = np.sqrt(np.diag(predictions['cp_distribution'].covariance_matrix.cpu().detach().numpy()))

    # organize into upper and lower surfaces for plotting
    f, ax = plt.subplots()
    test_xcu, test_xcl = predictions['xc'][:demo_num_points_per_surface].cpu(), predictions['xc'][demo_num_points_per_surface:].cpu() # x/c
    test_cpu, test_cpl = prediction_mean[:demo_num_points_per_surface], prediction_mean[demo_num_points_per_surface:] # C_p
    test_2sigu, test_2sigl = 2*prediction_sig[:demo_num_points_per_surface], 2*prediction_sig[demo_num_points_per_surface:] # 2 sigma in C_p
    plt.plot(test_xcu, test_cpu, 'r-', label='Predicted mean')
    plt.plot(test_xcl, test_cpl, 'r-')
    plt.fill_between(test_xcu, test_cpu-test_2sigu, test_cpu+test_2sigu, color='lightgray', label='Predicted 2$\sigma$')
    plt.fill_between(test_xcl, test_cpl-test_2sigl, test_cpl+test_2sigl, color='lightgray')
    plt.xlabel('x/c')
    plt.ylabel('$C_p$')
    plt.gca().invert_yaxis()
    plt.legend()
    plt.text(0.67, 0.7, '$c_l$ = ' + str(np.round(predictions['cl_mean'], 3)) + ' $\pm$ ' + str(np.round(2*predictions['cl_stdev'], 3)), transform=ax.transAxes)
    plt.text(0.67, 0.65, '$c_d$ = ' + str(np.round(predictions['cd_mean'], 3)) + ' $\pm$ ' + str(np.round(2*predictions['cd_stdev'], 3)), transform=ax.transAxes)
    plt.text(0.67, 0.6, '$c_m$ = ' + str(np.round(predictions['cm_mean'], 3)) + ' $\pm$ ' + str(np.round(2*predictions['cm_stdev'], 3)), transform=ax.transAxes)
    plt.show()

"""
main loop for user interaction, prompts user for query and processes it until user exits
"""
def process_user_query():
    qa_pipeline = load_bert_pipeline()
    lam_model, use_gpu = load_lam_model()

    print("The current thresholds for matching files are: 0.1 degree for angle of attack, 0.01 for mach number, and 1e5 for reynolds number.")
    keepThresholds = input("Do you want to change the thresholds? (yes/no): ").lower() #prompts user to change thresholds
    if keepThresholds == "yes" or keepThresholds == "y":
        angle_score = float(input("Enter new angle of attack threshold: "))
        mach_score = float(input("Enter new mach number threshold: "))
        re_score = float(input("Enter new reynolds number threshold: "))
    else:
        angle_score = 0.1
        mach_score = 0.01
        re_score = 1e5
    while True:
        query = input("Enter your query (or type 'exit' to quit): ")
        if query.lower() == "exit":
            print("Exiting...")
            break

        query_params = extract_query_parameters(query, qa_pipeline)
        if not query_params or None in query_params.values():
            print("Failed to extract parameters. Please try again.")
            continue

        airfoil_name = query_params["Airfoil Name"]
        print(f"Detected airfoil: {airfoil_name}")

        file_dict = get_airfoil_filenames(airfoil_name)
        if not file_dict: #if airfoil is not in folder, prompts user to enter a csv if not a NACA airfoil
            if(airfoil_name.upper().startswith("NACA")):
                print("Using LAM to predict distribution.")
                predict_distribution(query_params, airfoil_name.upper(), lam_model, use_gpu)
                continue
            print("Airfoil data not found. Enter a csv file of coordinates for prediction.")
            uploaded_files = files.upload()
            user_file = list(uploaded_files.keys())[0]
            print("Using LAM to predict distribution.")
            predict_distribution(query_params, user_file, lam_model, use_gpu)
            continue

        file_url, subfolder = find_best_matching_file(query_params, file_dict, angle_score, mach_score, re_score)
        if not file_url:
            print("No suitable file found. Using LAM to predict distribution.") #if no file match in folder, uses LAM, prompts user for csv if no coordinates file present
            subfolder = subfolder if subfolder else "root"
            coordinates_file = get_airfoil_geometry(file_dict, subfolder)
            if not coordinates_file:
                if(airfoil_name.upper().startswith("NACA")):
                    print("Using LAM to predict distribution.")
                    predict_distribution(query_params, airfoil_name.upper(), lam_model, use_gpu)
                    continue
                print("Coordinates file not found. Enter a csv file of coordinates for prediction.")
                uploaded_files = files.upload()
                user_file = list(uploaded_files.keys())[0]
                predict_distribution(query_params, user_file, lam_model, use_gpu)
                continue
            predict_distribution(query_params, coordinates_file, lam_model, use_gpu)
            continue

        download_and_plot_airfoil_data(file_url)

if __name__ == "__main__":
    process_user_query()


Collecting gpytorch==1.12
  Downloading gpytorch-1.12-py3-none-any.whl.metadata (8.0 kB)
Collecting linear-operator>=0.5.2 (from gpytorch==1.12)
  Downloading linear_operator-0.6-py3-none-any.whl.metadata (15 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/443 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-large-uncased-whole-word-masking-finetuned-squad were not used when initializing BertForQuestionAnswering: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cuda:0


Using device: cuda
Loading in model...
    Missing lam_L.pt file.
This is a large file (6.1 GB) that will need to be downloaded from https://drive.google.com/uc?export=download&id=1uN1zAYjMSJgsjuYRmBlAVaKJ9rTXA42Q
    Would you like to automatically download this file? y/n
y


Downloading...
From (original): https://drive.google.com/uc?id=1uN1zAYjMSJgsjuYRmBlAVaKJ9rTXA42Q
From (redirected): https://drive.google.com/uc?id=1uN1zAYjMSJgsjuYRmBlAVaKJ9rTXA42Q&confirm=t&uuid=f208f9a6-2ef3-4af9-9ee5-daf0e650cb2a
To: /content/model/lam_L.pt
100%|██████████| 6.53G/6.53G [01:24<00:00, 77.5MB/s]


    Download finished
    Loading complete!
The current thresholds for matching files are: 0.1 degree for angle of attack, 0.01 for mach number, and 1e5 for reynolds number.
Do you want to change the thresholds? (yes/no): no


KeyboardInterrupt: Interrupted by user