In [None]:
import math

# Bearing and system parameters
n = 9                      # number of balls range 7-12
N_rpm = 70                 # shaft speed in RPM
N = N_rpm / 60             # convert to Hz
d = 0.5                    # ball diameter in inches
D = 1.94                   # pitch diameter in inches
beta_deg = 35             # contact angle in degrees
beta_rad = math.radians(beta_deg)
cos_beta = math.cos(beta_rad)

# Calculate each frequency
BPFO = (n / 2) * N * (1 + (d / D) * cos_beta)
BPFI = (n / 2) * N * (1 - (d / D) * cos_beta)
BSF = (D / (2 * d)) * N * (1 - ((d / D) * cos_beta)**2)
FTF = 0.5 * N * (1 - (d / D) * cos_beta)


# Print results
print(f"BPFO (Outer Race Frequency): {BPFO:.2f} Hz")
print(f"BPFI (Inner Race Frequency): {BPFI:.2f} Hz")
print(f"BSF  (Ball Spin Frequency) : {BSF:.2f} Hz")
print(f"FTF  (Cage Frequency)      : {FTF:.2f} Hz")

#give you 1000 records based on these formula and bearing and system paramenters


BPFO (Outer Race Frequency): 6.36 Hz
BPFI (Inner Race Frequency): 4.14 Hz
BSF  (Ball Spin Frequency) : 2.16 Hz
FTF  (Cage Frequency)      : 0.46 Hz


#

In [None]:
import pandas as pd
import random
import math

# Function to compute bearing defect frequencies
def compute_frequencies(n, N_rpm, d, D, beta_deg):
    N = N_rpm / 60  # Convert RPM to Hz
    beta_rad = math.radians(beta_deg)
    cos_beta = math.cos(beta_rad)

    BPFO = (n / 2) * N * (1 + (d / D) * cos_beta)
    BPFI = (n / 2) * N * (1 - (d / D) * cos_beta)
    BSF  = (D / (2 * d)) * N * (1 - ((d / D) * cos_beta)**2)
    FTF  = 0.5 * N * (1 - (d / D) * cos_beta)

    return BPFO, BPFI, BSF, FTF

# Generate synthetic data
data = []

for _ in range(1000):
    n = random.randint(6, 16)  # Number of balls
    N_rpm = random.uniform(30, 3000)  # Shaft speed in RPM
    d = round(random.uniform(0.25, 1.0), 3)  # Ball diameter in inches
    D = round(random.uniform(d + 0.5, d + 3.0), 3)  # Ensure D > d
    beta_deg = random.uniform(15, 35)  # Contact angle in degrees

    # Compute frequencies
    BPFO, BPFI, BSF, FTF = compute_frequencies(n, N_rpm, d, D, beta_deg)

    # Append data
    data.append({
        'n': n,
        'N_rpm': N_rpm,
        'd_in': d,
        'D_in': D,
        'beta_deg': beta_deg,
        'BPFO_Hz': BPFO,
        'BPFI_Hz': BPFI,
        'BSF_Hz': BSF,
        'FTF_Hz': FTF
    })

# Create a pandas DataFrame
df = pd.DataFrame(data)

# Save to CSV
df.to_csv("vibration_frequencies_data.csv", index=False)
print("Saved to 'vibration_frequencies_data.csv'")


Saved to 'vibration_frequencies_data.csv'


In [None]:
df.head(50)


   BPFO	        BPFI	      BSF	    FTF     Anomaly
213.971505	113.736076	32.693356	8.748929    0

Unnamed: 0,n,N_rpm,d_in,D_in,beta_deg,BPFO_Hz,BPFI_Hz,BSF_Hz,FTF_Hz
0,13,1512.496529,0.838,2.398,28.92356,213.971505,113.736076,32.693356,8.748929
1,11,2409.167428,0.43,2.385,21.418728,257.906589,183.774106,108.217022,16.706737
2,7,1623.380179,0.352,1.714,34.776457,110.671223,78.723131,63.998552,11.246162
3,9,2010.337337,0.686,3.02,18.079299,183.333317,118.217284,70.312492,13.135254
4,15,664.823986,0.261,1.062,18.92523,102.422569,63.783427,21.324535,4.252228
5,9,1379.047306,0.609,2.636,23.726342,125.304153,81.552942,47.517138,9.061438
6,14,1983.251356,0.956,3.014,30.39488,294.682858,168.075792,48.20508,12.005414
7,6,102.304891,0.75,3.372,30.108299,6.099473,4.131016,3.691117,0.688503
8,11,279.289783,0.722,2.086,29.838275,33.288007,17.91512,6.118225,1.628647
9,8,1550.012268,0.55,1.474,32.220321,135.953971,70.714331,31.167378,8.839291


## Gemini LLM anomaly detection

In [None]:
import os
import pandas as pd
import numpy as np
import json
import google.generativeai as genai

# Configure the Gemini API
def setup_gemini_api(api_key):
    genai.configure(api_key=api_key)

# Load vibration dataset
def load_dataset(file_path):
    if file_path.endswith('.csv'):
        return pd.read_csv(file_path)
    elif file_path.endswith(('.xlsx', '.xls')):
        return pd.read_excel(file_path)
    elif file_path.endswith('.txt'):
        return pd.read_csv(file_path, delim_whitespace=True)
    else:
        raise ValueError("Unsupported file format. Please provide a CSV, Excel, or text file.")

# Ask Gemini to check if the given frequency values indicate an anomaly
def analyze_frequencies_with_llm(fault_freqs, dataset_data, model_name="gemini-1.5-pro"):
    prompt = f"""
You are an expert in bearing vibration analysis.

The user has provided the following bearing fault frequencies (in Hz):
- BPFO (Ball Pass Frequency Outer Race): {fault_freqs['BPFO']}
- BPFI (Ball Pass Frequency Inner Race): {fault_freqs['BPFI']}
- BSF (Ball Spin Frequency): {fault_freqs['BSF']}
- FTF (Fundamental Train Frequency): {fault_freqs['FTF']}

Here is historical bearing vibration data (in Hz) from a real dataset:
Any data does not follow such pattern can be claimed as anomaly.
{serialized_records}

Based on the above dataset and provided values, determine:
1. If the user-input frequencies indicate an anomaly.
2. Which component is most likely affected (outer race, inner race, rolling element, cage).
3. Severity of the anomaly (low, medium, high).
4. Maintenance recommendation.

Format your response as JSON:
{{
    "fault_detected": true/false,
    "fault_type": "outer_race/inner_race/rolling_element/cage/none",
    "severity": "low/medium/high",
    "confidence": 0-100,
    "diagnosis": "Your detailed diagnosis",
    "recommendations": ["List of recommendations"]
}}
"""

    model = genai.GenerativeModel(model_name=model_name)
    response = model.generate_content(prompt)

    try:
        response_text = response.text
        start_idx = response_text.find('{')
        end_idx = response_text.rfind('}') + 1

        if start_idx >= 0 and end_idx > start_idx:
            json_str = response_text[start_idx:end_idx]
            result = json.loads(json_str)
        else:
            result = {"raw_response": response_text}
    except Exception as e:
        result = {
            "error": str(e),
            "raw_response": response.text
        }

    return result

# Serialize selected columns to a readable format
def serialize_columns(df, columns, max_rows=30):
    filtered = df[columns].head(max_rows)  # Limit rows for prompt size
    return filtered.to_string(index=False)

# Main function
def main():
    print("=== Bearing Fault Frequency Anomaly Detection with Gemini ===\n")

    # Gemini API Key
    api_key = ""
    if not api_key:
        api_key = input("Enter your Gemini API key: ")

    setup_gemini_api(api_key)

    dataset_path = input("Enter path to reference dataset (CSV/Excel/TXT): ")
    try:
        dataset = load_dataset(dataset_path)

        required_columns = ["BPFO_Hz", "BPFI_Hz", "BSF_Hz", "FTF_Hz"]
        for col in required_columns:
            if col not in dataset.columns:
                raise ValueError(f"Missing required column: {col}")

        dataset_serialized = serialize_columns(dataset, required_columns)
        print(serialized_records)

    except Exception as e:
        print("Failed to load dataset:", e)
        return

    # User input frequencies
    try:
        BPFO = float(input("Enter BPFO (Hz): "))
        BPFI = float(input("Enter BPFI (Hz): "))
        BSF = float(input("Enter BSF (Hz): "))
        FTF = float(input("Enter FTF (Hz): "))

        fault_freqs = {
            "BPFO": BPFO,
            "BPFI": BPFI,
            "BSF": BSF,
            "FTF": FTF
        }
    except ValueError:
        print("Invalid input. Please enter numerical values for frequencies.")
        return

    print("\nAnalyzing with Gemini... Please wait.\n")
    result = analyze_frequencies_with_llm(fault_freqs, dataset_serialized)

    print("\n=== Analysis Result ===")


    print(json.dumps(result, indent=4))
if __name__ == "__main__":
    main()


=== Bearing Fault Frequency Anomaly Detection with Gemini ===

Enter path to reference dataset (CSV/Excel/TXT): 32.28800724893038
Failed to load dataset: Unsupported file format. Please provide a CSV, Excel, or text file.


In [None]:
import pandas as pd

# Load your dataset
df = pd.read_csv('/content/vibration_frequencies_data.csv')  # Replace with your actual file path

# Initialize a list to hold the serialized strings
serialized_records = []

# Iterate over each row in the DataFrame
for index, row in df.iterrows():
    # Construct the textual representation
    record_str = (
        f"For this record, the value of BPFO_Hz is {row['BPFO_Hz']}, "
        f"BPFI_Hz is {row['BPFI_Hz']}, "
        f"BSF_Hz is {row['BSF_Hz']}, and "
        f"FTF_Hz is {row['FTF_Hz']}. "
        f"These values are not an anomaly - false."
    )
    # Append the string to the list
    serialized_records.append(record_str)

# Now, serialized_records contains the textual representations of all records
# You can print them or process them further
print(serialized_records)


['For this record, the value of BPFO_Hz is 213.9715051317358, BPFI_Hz is 113.7360761914522, BSF_Hz is 32.69335556785792, and FTF_Hz is 8.748928937804015. These values are not an anomaly - false.', 'For this record, the value of BPFO_Hz is 257.9065891961503, BPFI_Hz is 183.77410589875, BSF_Hz is 108.21702166337406, and FTF_Hz is 16.706736899886362. These values are not an anomaly - false.', 'For this record, the value of BPFO_Hz is 110.67122308315888, BPFI_Hz is 78.72313116536385, BSF_Hz is 63.99855232264847, and FTF_Hz is 11.246161595051978. These values are not an anomaly - false.', 'For this record, the value of BPFO_Hz is 183.3333167652584, BPFI_Hz is 118.21728380811685, BSF_Hz is 70.31249159990683, and FTF_Hz is 13.135253756457429. These values are not an anomaly - false.', 'For this record, the value of BPFO_Hz is 102.42256948730704, BPFI_Hz is 63.78342711842304, BSF_Hz is 21.32453523031725, and FTF_Hz is 4.252228474561536. These values are not an anomaly - false.', 'For this reco


# ANOMALY OR NOT - INPUT TEST INDIVIDUAL

In [None]:

import pandas as pd
import json
import random
from sklearn.metrics import confusion_matrix, classification_report
import google.generativeai as genai

# Set your Gemini API key here
API_KEY = ""
genai.configure(api_key=API_KEY)

# Initialize the Gemini model
model = genai.GenerativeModel(model_name="gemini-2.5-pro-preview-05-06")

# Serialize a record to a natural language format
def serialize_record(row, include_label=True):
    base = (
        f"For this record, the value of BPFO_Hz is {row['BPFO_Hz']}, "
        f"the value of BPFI_Hz is {row['BPFI_Hz']}, "
        f"the value of BSF_Hz is {row['BSF_Hz']}, "
        f"and the value of FTF_Hz is {row['FTF_Hz']}."
    )
    if include_label and "anomaly" in row:
        base += f" These values are {'an anomaly - true' if row['anomaly'] else 'not an anomaly - false'}."
    return base

# Build the prompt with few-shot examples + test input
def build_prompt(train_df, test_row, few_shot_count=800):
    examples = train_df.sample(n=min(few_shot_count, len(train_df)))
    prompt = "You are an expert in bearing vibration anomaly detection.\n"
    prompt += "Here are some examples:\n\n"

    for _, row in examples.iterrows():
        prompt += serialize_record(row) + "\n"

    prompt += "\nNow analyze this record:\n"
    prompt += serialize_record(test_row, include_label=False)
    prompt += "\nIs it an anomaly? Respond in JSON as:\n"
    prompt += '{ "anomaly": true/false }'

    return prompt

# Predict anomaly using Gemini
def predict_with_gemini(prompt):
    try:
        response = model.generate_content(prompt)
        response_text = response.text
        start = response_text.find('{')
        end = response_text.rfind('}') + 1
        if start != -1 and end > start:
            result = json.loads(response_text[start:end])
            return result.get("anomaly", None)
        return None
    except Exception as e:
        print("Error:", e)
        return None

# Main pipeline
def main(train_path, test_path):
    train_df = pd.read_excel(train_path)
    test_df = pd.read_excel(test_path)
    test_df = test_df.head(100)

    y_true = []
    y_pred = []

    print("Starting predictions...\n")

    for i, row in test_df.iterrows():
        prompt = build_prompt(train_df, row)
        prediction = predict_with_gemini(prompt)

        actual = row["anomaly"]
        if prediction is not None:
            y_pred.append(prediction)
            y_true.append(actual)
            print(f"[{i+1}] Predicted: {prediction}, Actual: {actual}")
        else:
            print(f"[{i+1}] Prediction failed.")

    # Evaluation
    print("\n=== Confusion Matrix ===")
    print(confusion_matrix(y_true, y_pred, labels=[True, False]))
    print("\n=== Classification Report ===")
    print(classification_report(y_true, y_pred, target_names=["Normal", "Anomaly"]))

# Example usage
if __name__ == "__main__":
    train_xlsx = "/content/train_data.xlsx"  # replace with your training dataset path
    test_xlsx = "/content/test_data (2)_with_predictions.xlsx"    # replace with your test dataset path
    main(train_xlsx, test_xlsx)


Starting predictions...

[1] Predicted: False, Actual: False
[2] Predicted: True, Actual: True
[3] Predicted: True, Actual: True
[4] Predicted: True, Actual: True
[5] Predicted: True, Actual: True


ERROR:tornado.access:500 POST /v1beta/models/gemini-2.5-pro-preview-05-06:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 293647.02ms


Error: 500 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro-preview-05-06:generateContent?%24alt=json%3Benum-encoding%3Dint: TypeError: Failed to fetch
[6] Prediction failed.
[7] Predicted: False, Actual: False
[8] Predicted: False, Actual: False
[9] Predicted: True, Actual: False
[10] Predicted: True, Actual: True
[11] Predicted: True, Actual: False
[12] Predicted: False, Actual: False
[13] Predicted: False, Actual: False
[14] Predicted: False, Actual: False
[15] Predicted: False, Actual: False
[16] Predicted: False, Actual: False
[17] Predicted: False, Actual: False
[18] Predicted: False, Actual: False
[19] Predicted: False, Actual: False
[20] Predicted: False, Actual: False
[21] Predicted: False, Actual: False
[22] Predicted: False, Actual: False
[23] Predicted: False, Actual: False
[24] Predicted: True, Actual: False
[25] Predicted: False, Actual: False
[26] Predicted: True, Actual: True
[27] Predicted: True, Actual: True
[28] Predicted: False, Actual: Tr

## Without serialisation

In [None]:
import pandas as pd
import json
from sklearn.metrics import confusion_matrix, classification_report
import google.generativeai as genai

# Set your Gemini API key here
API_KEY = ""
genai.configure(api_key=API_KEY)

# Initialize the Gemini model
model = genai.GenerativeModel(model_name="gemini-1.5-pro")

# Build the prompt with structured examples
def build_prompt(train_df, test_row, few_shot_count=800):
    examples = train_df.sample(n=min(few_shot_count, len(train_df)))
    prompt = (
        "You are an expert in bearing vibration anomaly detection.\n"
        "Given the numerical features, determine whether the record is an anomaly.\n"
        "Respond in JSON as: { \"anomaly\": true/false }\n\n"
        "Here are some examples:\n\n"
    )

    for _, row in examples.iterrows():
        example_json = {
            "BPFO_Hz": row["BPFO_Hz"],
            "BPFI_Hz": row["BPFI_Hz"],
            "BSF_Hz": row["BSF_Hz"],
            "FTF_Hz": row["FTF_Hz"],
            "anomaly": row["anomaly"]
        }
        prompt += json.dumps(example_json) + "\n"

    test_input = {
        "BPFO_Hz": test_row["BPFO_Hz"],
        "BPFI_Hz": test_row["BPFI_Hz"],
        "BSF_Hz": test_row["BSF_Hz"],
        "FTF_Hz": test_row["FTF_Hz"]
    }

    prompt += "\nNow analyze this record:\n"
    prompt += json.dumps(test_input)

    return prompt

# Predict anomaly using Gemini
def predict_with_gemini(prompt):
    try:
        response = model.generate_content(prompt)
        print(prompt)
        response_text = response.text
        start = response_text.find('{')
        end = response_text.rfind('}') + 1
        if start != -1 and end > start:
            result = json.loads(response_text[start:end])
            return result.get("anomaly", None)
        return None
    except Exception as e:
        print("Error:", e)
        return None

# Main pipeline
def main(train_path, test_path):
    train_df = pd.read_excel(train_path)
    test_df = pd.read_excel(test_path)
    test_df = test_df.head(100)

    y_true = []
    y_pred = []

    print("Starting predictions...\n")

    for i, row in test_df.iterrows():
        prompt = build_prompt(train_df, row)
        prediction = predict_with_gemini(prompt)

        actual = row["anomaly"]
        if prediction is not None:
            y_pred.append(prediction)
            y_true.append(actual)
            print(f"[{i+1}] Predicted: {prediction}, Actual: {actual}")
        else:
            print(f"[{i+1}] Prediction failed.")

    # Evaluation
    print("\n=== Confusion Matrix ===")
    print(confusion_matrix(y_true, y_pred, labels=[True, False]))
    print("\n=== Classification Report ===")
    print(classification_report(y_true, y_pred, target_names=["Normal", "Anomaly"]))

# Example usage
if __name__ == "__main__":
    train_xlsx = "/content/train_data.xlsx"  # replace with your training dataset path
    test_xlsx = "/content/vibraration test data (2).xlsx"  # replace with your test dataset path
    main(train_xlsx, test_xlsx)


Starting predictions...



In [None]:
############### STORING XLSX

import pandas as pd
import json
import random
from sklearn.metrics import confusion_matrix, classification_report
import google.generativeai as genai

# Set your Gemini API key here
API_KEY = ""
genai.configure(api_key=API_KEY)

# Initialize the Gemini model
model = genai.GenerativeModel(model_name="gemini-2.5-pro-preview-05-06")

def serialize_record(row, include_label=True):
    base = (
        f"For this record, the value of BPFO_Hz is {row['BPFO_Hz']}, "
        f"the value of BPFI_Hz is {row['BPFI_Hz']}, "
        f"the value of BSF_Hz is {row['BSF_Hz']}, "
        f"and the value of FTF_Hz is {row['FTF_Hz']}."
    )
    if include_label and "anomaly" in row:
        base += f" These values are {'an anomaly - true' if row['anomaly'] else 'not an anomaly - false'}."
    return base

def build_prompt(train_df, test_row, few_shot_count=800):
    examples = train_df.sample(n=min(few_shot_count, len(train_df)))
    prompt = "You are an expert in bearing vibration anomaly detection.\n"
    prompt += "Here are some examples:\n\n"

    for _, row in examples.iterrows():
        prompt += serialize_record(row) + "\n"

    prompt += "\nNow analyze this record:\n"
    prompt += serialize_record(test_row, include_label=False)
    prompt += "\nIs it an anomaly? Respond in JSON as:\n"
    prompt += '{ "anomaly": true/false }'

    return prompt

def predict_with_gemini(prompt):
    try:
        response = model.generate_content(prompt)
        response_text = response.text
        start = response_text.find('{')
        end = response_text.rfind('}') + 1
        if start != -1 and end > start:
            result = json.loads(response_text[start:end])
            return result.get("anomaly", None)
        return None
    except Exception as e:
        print("Error:", e)
        return None

def main(train_path, test_path):
    train_df = pd.read_excel(train_path)
    test_df = pd.read_excel(test_path)
    test_df = test_df.head(100)

    y_true = []
    y_pred = []

    predictions_list = []

    print("Starting predictions...\n")

    for i, row in test_df.iterrows():
        prompt = build_prompt(train_df, row)
        prediction = predict_with_gemini(prompt)

        actual = row["anomaly"]
        if prediction is not None:
            y_pred.append(prediction)
            y_true.append(actual)
            predictions_list.append({
                **row.to_dict(),
                "predicted_gemini-2.5-pro": prediction,
                "actual_anomaly": actual
            })
            print(f"[{i+1}] Predicted: {prediction}, Actual: {actual}")
        else:
            print(f"[{i+1}] Prediction failed.")

    print("\n=== Confusion Matrix ===")
    print(confusion_matrix(y_true, y_pred, labels=[True, False]))
    print("\n=== Classification Report ===")
    print(classification_report(y_true, y_pred, target_names=["Normal", "Anomaly"]))

    # Save predictions to Excel
    output_df = pd.DataFrame(predictions_list)
    output_df.to_excel("predictions_output.xlsx", index=False)
    print("\nPredictions saved to 'predictions_output.xlsx'")

# Example usage
if __name__ == "__main__":
    train_xlsx = "/content/train_data.xlsx"
    test_xlsx = "/content/test_data (2)_with_predictions_with_predictions.xlsx"
    main(train_xlsx, test_xlsx)


Starting predictions...

Error: HTTPConnectionPool(host='localhost', port=43265): Read timed out. (read timeout=600.0)
[1] Prediction failed.
Error: HTTPConnectionPool(host='localhost', port=43265): Read timed out. (read timeout=600.0)
[2] Prediction failed.


# OPENAI GPT

In [None]:
import pandas as pd
import json
import random
from sklearn.metrics import confusion_matrix, classification_report
from openai import OpenAI

# Set your OpenAI API key here
API_KEY = ""
client = OpenAI(api_key=API_KEY)

# Serialize a record to a natural language format
def serialize_record(row, include_label=True):
    base = (
        f"For this record, the value of BPFO_Hz is {row['BPFO_Hz']}, "
        f"the value of BPFI_Hz is {row['BPFI_Hz']}, "
        f"the value of BSF_Hz is {row['BSF_Hz']}, "
        f"and the value of FTF_Hz is {row['FTF_Hz']}."
    )
    if include_label and "anomaly" in row:
        base += f" These values are {'an anomaly - true' if row['anomaly'] else 'not an anomaly - false'}."
    return base

# Build the prompt with few-shot examples + test input
def build_prompt(train_df, test_row, few_shot_count=100):
    # Limit the few-shot examples to prevent token limit issues with GPT
    examples = train_df.sample(n=min(few_shot_count, len(train_df)))

    # Format messages for OpenAI's chat completion API
    messages = [
        {"role": "system", "content": "You are an expert in bearing vibration anomaly detection."},
        {"role": "user", "content": "I will show you several examples of bearing vibration data with labels, then ask you to classify a new sample."}
    ]

    # Add examples
    example_content = "Here are some examples:\n\n"
    for _, row in examples.iterrows():
        example_content += serialize_record(row) + "\n"

    messages.append({"role": "user", "content": example_content})

    # Add the test case
    test_content = "\nNow analyze this record:\n"
    test_content += serialize_record(test_row, include_label=False)
    test_content += "\nIs it an anomaly? Respond in JSON as:\n"
    test_content += '{ "anomaly": true/false }'

    messages.append({"role": "user", "content": test_content})

    return messages

# Predict anomaly using OpenAI's GPT
def predict_with_gpt(messages):
    try:
        response = client.chat.completions.create(
            model="o1-2024-12-17",  # You can use "gpt-3.5-turbo" or other models as needed
            messages=messages

        )

        response_text = response.choices[0].message.content
        start = response_text.find('{')
        end = response_text.rfind('}') + 1

        if start != -1 and end > start:
            result = json.loads(response_text[start:end])
            return result.get("anomaly", None)
        return None
    except Exception as e:
        print("Error:", e)
        return None

# Main pipeline
def main(train_path, test_path, sample_size=100):
    train_df = pd.read_excel(train_path)
    test_df = pd.read_excel(test_path)
    test_df = test_df.head(sample_size)  # Limit test samples to control API usage

    y_true = []
    y_pred = []

    print("Starting predictions...\n")

    for i, row in test_df.iterrows():
        messages = build_prompt(train_df, row)
        prediction = predict_with_gpt(messages)

        actual = row["anomaly"]
        if prediction is not None:
            y_pred.append(prediction)
            y_true.append(actual)
            print(f"[{i+1}/{sample_size}] Predicted: {prediction}, Actual: {actual}")
        else:
            print(f"[{i+1}/{sample_size}] Prediction failed.")

    # Evaluation
    if len(y_true) > 0:
        print("\n=== Confusion Matrix ===")
        print(confusion_matrix(y_true, y_pred, labels=[True, False]))
        print("\n=== Classification Report ===")
        print(classification_report(y_true, y_pred, target_names=["Anomaly", "Normal"]))
    else:
        print("No valid predictions to evaluate.")

# For running in Google Colab, you might need to install the latest OpenAI package
# !pip install --upgrade openai

# Example usage
if __name__ == "__main__":
    train_xlsx = "/content/train_data.xlsx"  # replace with your training dataset path
    test_xlsx = "/content/vibraration test data (2).xlsx"  # replace with your test dataset path
    main(train_xlsx, test_xlsx)

Starting predictions...

[1/100] Predicted: False, Actual: False
[2/100] Predicted: True, Actual: True


KeyboardInterrupt: 

#GPT

In [None]:
import pandas as pd
import json
import random
from sklearn.metrics import confusion_matrix, classification_report
from openai import OpenAI

# Set your OpenAI API key here

# Set your OpenAI API key here
API_KEY = ""
client = OpenAI(api_key=API_KEY)

# Serialize a record to a natural language format
def serialize_record(row, include_label=True):
    base = (
        f"For this record, the value of BPFO_Hz is {row['BPFO_Hz']}, "
        f"the value of BPFI_Hz is {row['BPFI_Hz']}, "
        f"the value of BSF_Hz is {row['BSF_Hz']}, "
        f"and the value of FTF_Hz is {row['FTF_Hz']}."
    )
    if include_label and "anomaly" in row:
        base += f" These values are {'an anomaly - true' if row['anomaly'] else 'not an anomaly - false'}."
    return base

# Build the prompt with few-shot examples + test input
def build_prompt(train_df, test_row, few_shot_count=100):
    # Limit the few-shot examples to prevent token limit issues with GPT
    examples = train_df.sample(n=min(few_shot_count, len(train_df)))

    # Format messages for OpenAI's chat completion API
    messages = [
        {"role": "system", "content": "You are an expert in bearing vibration anomaly detection."},
        {"role": "user", "content": "I will show you several examples of bearing vibration data with labels, then ask you to classify a new sample."}
    ]

    # Add examples
    example_content = "Here are some examples:\n\n"
    for _, row in examples.iterrows():
        example_content += serialize_record(row) + "\n"

    messages.append({"role": "user", "content": example_content})

    # Add the test case
    test_content = "\nNow analyze this record:\n"
    test_content += serialize_record(test_row, include_label=False)
    test_content += "\nIs it an anomaly? Respond in JSON as:\n"
    test_content += '{ "anomaly": true/false }'

    messages.append({"role": "user", "content": test_content})

    return messages

# Predict anomaly using OpenAI's GPT
def predict_with_gpt(messages):
    try:
        response = client.chat.completions.create(
            model="o3-mini",  # You can use "gpt-3.5-turbo" or other models as needed
            messages=messages  # Limit response size since we only need the JSON
        )

        response_text = response.choices[0].message.content
        start = response_text.find('{')
        end = response_text.rfind('}') + 1

        if start != -1 and end > start:
            result = json.loads(response_text[start:end])
            return result.get("anomaly", None)
        return None
    except Exception as e:
        print("Error:", e)
        return None

# Main pipeline
def main(train_path, test_path, sample_size=100):
    train_df = pd.read_excel(train_path)
    test_df = pd.read_excel(test_path)
    test_df = test_df.head(sample_size)  # Limit test samples to control API usage

    # Add a column for storing predictions
    test_df["predicted o3-mini"] = None

    y_true = []
    y_pred = []

    print("Starting predictions...\n")

    for i, row in test_df.iterrows():
        messages = build_prompt(train_df, row)
        prediction = predict_with_gpt(messages)

        actual = row["anomaly"]
        if prediction is not None:
            y_pred.append(prediction)
            y_true.append(actual)
            # Store the prediction in the DataFrame
            test_df.at[i, "predicted o3-mini"] = prediction
            print(f"[{i+1}/{sample_size}] Predicted: {prediction}, Actual: {actual}")
        else:
            print(f"[{i+1}/{sample_size}] Prediction failed.")

    # Save the predictions back to Excel
    output_path = test_path.replace(".xlsx", "_with_predictions.xlsx")
    test_df.to_excel(output_path, index=False)
    print(f"\nSaved predictions to {output_path}")

    # Evaluation
    if len(y_true) > 0:
        print("\n=== Confusion Matrix ===")
        print(confusion_matrix(y_true, y_pred, labels=[True, False]))
        print("\n=== Classification Report ===")
        print(classification_report(y_true, y_pred, target_names=["Anomaly", "Normal"]))
    else:
        print("No valid predictions to evaluate.")

# For running in Google Colab, you might need to install the latest OpenAI package
# !pip install --upgrade openai

# Example usage
if __name__ == "__main__":
    train_xlsx = "/content/train_data.xlsx"  # replace with your training dataset path
    test_xlsx = "/content/test_data (2).xlsx"  # replace with your test dataset path
    main(train_xlsx, test_xlsx)

Starting predictions...

[1/100] Predicted: False, Actual: False
[2/100] Predicted: True, Actual: True
[3/100] Predicted: True, Actual: True
[4/100] Predicted: True, Actual: True
[5/100] Predicted: True, Actual: True
[6/100] Predicted: True, Actual: True
[7/100] Predicted: False, Actual: False
[8/100] Predicted: False, Actual: False
[9/100] Predicted: True, Actual: False
[10/100] Predicted: True, Actual: True
[11/100] Predicted: False, Actual: False
[12/100] Predicted: False, Actual: False
[13/100] Predicted: False, Actual: False
[14/100] Predicted: False, Actual: False
[15/100] Predicted: False, Actual: False
[16/100] Predicted: False, Actual: False
[17/100] Predicted: False, Actual: False
[18/100] Predicted: False, Actual: False
[19/100] Predicted: False, Actual: False
[20/100] Predicted: False, Actual: False
[21/100] Predicted: False, Actual: False
[22/100] Predicted: False, Actual: False
[23/100] Predicted: False, Actual: False
[24/100] Predicted: False, Actual: False
[25/100] Pred

# llama 3.1

```
# This is formatted as code
```



In [None]:
import pandas as pd
import json
import random
from sklearn.metrics import confusion_matrix, classification_report
from openai import OpenAI  # Using OpenAI's client for NVIDIA endpoint

# Set your NVIDIA API key here
API_KEY = ""  # Replace this with your actual NVIDIA API key

# Initialize the client with NVIDIA's endpoint
client = OpenAI(
    base_url="https://integrate.api.nvidia.com/v1",
    api_key=API_KEY
)

# Serialize a record to a natural language format
def serialize_record(row, include_label=True):
    base = (
        f"For this record, the value of BPFO_Hz is {row['BPFO_Hz']}, "
        f"the value of BPFI_Hz is {row['BPFI_Hz']}, "
        f"the value of BSF_Hz is {row['BSF_Hz']}, "
        f"and the value of FTF_Hz is {row['FTF_Hz']}."
    )
    if include_label and "anomaly" in row:
        base += f" These values are {'an anomaly - true' if row['anomaly'] else 'not an anomaly - false'}."
    return base

# Build the prompt with few-shot examples + test input
def build_prompt(train_df, test_row, few_shot_count=100):
    examples = train_df.sample(n=min(few_shot_count, len(train_df)))

    messages = [
        {"role": "system", "content": "You are an expert in bearing vibration anomaly detection."},
        {"role": "user", "content": "I will show you several examples of bearing vibration data with labels, then ask you to classify a new sample."}
    ]

    example_content = "Here are some examples:\n\n"
    for _, row in examples.iterrows():
        example_content += serialize_record(row) + "\n"

    messages.append({"role": "user", "content": example_content})

    test_content = "\nNow analyze this record:\n"
    test_content += serialize_record(test_row, include_label=False)
    test_content += "\nIs it an anomaly? Respond in JSON as:\n"
    test_content += '{ "anomaly": true/false }'

    messages.append({"role": "user", "content": test_content})

    return messages

# Predict anomaly using NVIDIA LLaMA 3.1 model
def predict_with_llama(messages):
    try:
        completion = client.chat.completions.create(
            model="nvidia/llama-3.1-nemotron-ultra-253b-v1",
            messages=messages,
            temperature=0.6,
            top_p=0.95,
            max_tokens=4096,
            frequency_penalty=0,
            presence_penalty=0,
            stream=True
        )

        full_response = ""
        for chunk in completion:
            if chunk.choices[0].delta.content is not None:
                full_response += chunk.choices[0].delta.content

        start = full_response.find('{')
        end = full_response.rfind('}') + 1

        if start != -1 and end > start:
            result = json.loads(full_response[start:end])
            return result.get("anomaly", None)
        return None
    except Exception as e:
        print("Error:", e)
        return None

# Main pipeline
def main(train_path, test_path, sample_size=100):
    train_df = pd.read_excel(train_path)
    test_df = pd.read_excel(test_path)
    test_df = test_df.head(sample_size)  # Limit test samples to control API usage

    test_df["predicted llama-3.1-nemotron-ultra-253b-v1"] = None

    y_true = []
    y_pred = []

    print("Starting predictions...\n")

    for i, row in test_df.iterrows():
        messages = build_prompt(train_df, row)
        prediction = predict_with_llama(messages)

        actual = row["anomaly"]
        if prediction is not None:
            y_pred.append(prediction)
            y_true.append(actual)
            test_df.at[i, "predicted llama-3.1-nemotron-ultra-253b-v1"] = prediction
            print(f"[{i+1}/{sample_size}] Predicted: {prediction}, Actual: {actual}")
        else:
            print(f"[{i+1}/{sample_size}] Prediction failed.")

    # Save predictions
    output_path = test_path.replace(".xlsx", "_with_predictions.xlsx")
    test_df.to_excel(output_path, index=False)
    print(f"\nSaved predictions to {output_path}")

    # Evaluation
    if len(y_true) > 0:
        print("\n=== Confusion Matrix ===")
        print(confusion_matrix(y_true, y_pred, labels=[True, False]))
        print("\n=== Classification Report ===")
        print(classification_report(y_true, y_pred, target_names=["Anomaly", "Normal"]))
    else:
        print("No valid predictions to evaluate.")

# Example usage
if __name__ == "__main__":
    train_xlsx = "/content/train_data.xlsx"  # Update this path
    test_xlsx = "/content/test_data (2)_with_predictions.xlsx"    # Update this path
    main(train_xlsx, test_xlsx)


Starting predictions...

Error: Error code: 403 - {'status': 403, 'title': 'Forbidden', 'detail': 'Authorization failed'}
[1/100] Prediction failed.
Error: Error code: 403 - {'status': 403, 'title': 'Forbidden', 'detail': 'Authorization failed'}
[2/100] Prediction failed.
Error: Error code: 403 - {'status': 403, 'title': 'Forbidden', 'detail': 'Authorization failed'}
[3/100] Prediction failed.
Error: Error code: 403 - {'status': 403, 'title': 'Forbidden', 'detail': 'Authorization failed'}
[4/100] Prediction failed.
Error: Error code: 403 - {'status': 403, 'title': 'Forbidden', 'detail': 'Authorization failed'}
[5/100] Prediction failed.
Error: Error code: 403 - {'status': 403, 'title': 'Forbidden', 'detail': 'Authorization failed'}
[6/100] Prediction failed.
Error: Error code: 403 - {'status': 403, 'title': 'Forbidden', 'detail': 'Authorization failed'}
[7/100] Prediction failed.
Error: Error code: 403 - {'status': 403, 'title': 'Forbidden', 'detail': 'Authorization failed'}
[8/100] Pr

In [None]:
# from openai import OpenAI

# # client = OpenAI(
# #   base_url = "https://integrate.api.nvidia.com/v1",
# #   api_key = ""
# # )

# # completion = client.chat.completions.create(
# #   model="nvidia/llama-3.1-nemotron-ultra-253b-v1",
# #   messages=[{"role":"system","content":"detailed thinking on"}],
# #   temperature=0.6,
# #   top_p=0.95,
# #   max_tokens=4096,
# #   frequency_penalty=0,
# #   presence_penalty=0,
# #   stream=True
# # )

# # for chunk in completion:
# #   if chunk.choices[0].delta.content is not None:
# #     print(chunk.choices[0].delta.content, end="")


# query = "hi is this"
# api_key=""
# client = OpenAI(
#     base_url="https://integrate.api.nvidia.com/v1",
#     api_key=api_key
# )
# messages = [
#         {"role": "system", "content": "detailed thinking off. You are an assistant that determines if a query is requesting a data visualization. Respond with only 'true' if the query is asking for a plot, chart, graph, or any visual representation of data. Otherwise, respond with 'false'."},
#         {"role": "user", "content": query}
#     ]

# response = client.chat.completions.create(
#     model="nvidia/llama-3.1-nemotron-ultra-253b-v1",
#     messages=messages,
#     temperature=0.1,
#     max_tokens=5  # We only need a short response
# )

# # Extract the response and convert to boolean
# intent_response = response.choices[0].message.content.strip().lower()
# print(intent_response)



import requests

headers = {
    "Authorization": "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",
    "Content-Type": "application/json"
}

data = {
    "model": "nvidia/llama-3.1-nemotron-ultra-253b-v1",
    "messages": [{"role": "user", "content": "Summarize this data analysis:"}],
    "temperature": 0.6,
    "max_tokens": 1024
}

response = requests.post("https://integrate.api.nvidia.com/v1/chat/completions", headers=headers, json=data)

print(response.json()["choices"][0]["message"]["content"])


JSONDecodeError: Expecting value: line 1 column 1 (char 0)