In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
import pickle

# Load dataset
data = pd.read_csv("p.csv")

# Check if at least two classes exist
print("Class distribution:\n", data['status'].value_counts())

# Prepare features and labels
X = data.drop(columns=["name", "status"])
y = data["status"]

# Feature scaling before splitting
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


Class distribution:
 status
1    3
0    3
Name: count, dtype: int64


In [3]:
# Now split
X_train = X_scaled
y_train = y
X_test = X_scaled
y_test = y

# Ensure at least 2 unique classes
if len(set(y_train)) < 2:
    raise ValueError("Training data has only one class.")

# Train SVM model
model = SVC(kernel='rbf', probability=True)
model.fit(X_train, y_train)


In [4]:
from sklearn.metrics import accuracy_score, classification_report

y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Report:\n", classification_report(y_test, y_pred))


Accuracy: 1.0
Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      1.00      1.00         3

    accuracy                           1.00         6
   macro avg       1.00      1.00      1.00         6
weighted avg       1.00      1.00      1.00         6



In [5]:
from langchain_google_genai import ChatGoogleGenerativeAI
import ast

llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key="AIzaSyCKHbgeWFX4KitqeTxjvSRGTWUpxiDTWmE",temperature=0.7)

required_features = [
    "MDVP:Fo(Hz)", "MDVP:Fhi(Hz)", "MDVP:Flo(Hz)", "MDVP:Jitter(%)", "MDVP:RAP", "MDVP:PPQ",
    "Jitter:DDP", "MDVP:Shimmer", "spread1", "spread2", "DFA"
]

extract_template = """
Extract the following features from the input sentence and return them ONLY as a Python dictionary without any explanation or markdown formatting:

Features: {features}

Input: "{user_input}"
"""

def extract_features_from_text(user_input):
    prompt = extract_template.format(
        features=", ".join(required_features),
        user_input=user_input
    )
    response = llm.invoke(prompt)
    content = response.content.strip()

    print("Raw Gemini response:\n", content)  # <-- helpful for debugging

    # Strip Markdown/formatting if present
    if content.startswith("```"):
        content = content.strip("`")
        content = content.replace("python", "").strip()

    try:
        return ast.literal_eval(content)
    except Exception as e:
        print("Error parsing Gemini response:", e)
        return None


In [6]:
def predict_from_text(user_sentence):
    extracted = extract_features_from_text(user_sentence)
    if not extracted:
        return "Failed to extract features. Please try again."

    try:
        input_df = pd.DataFrame([extracted])
        input_scaled = scaler.transform(input_df)
        prediction = model.predict(input_scaled)
        return "Parkinson Detected" if prediction[0] == 1 else "No Parkinson"
    except Exception as e:
        return f"Prediction error: {str(e)}"


In [10]:
user_sentence = "My average frequency is 130 Hz, high freq 165, low 85, jitter 0.0023, RAP 0.0012, PPQ 0.0024, DDP 0.0036, shimmer 0.030, spread1 -5.0, spread2 0.220, and DFA is 0.710"
print(predict_from_text(user_sentence))


Raw Gemini response:
 {'MDVP:Fo(Hz)': 130, 'MDVP:Fhi(Hz)': 165, 'MDVP:Flo(Hz)': 85, 'MDVP:Jitter(%)': 0.0023, 'MDVP:RAP': 0.0012, 'MDVP:PPQ': 0.0024, 'Jitter:DDP': 0.0036, 'MDVP:Shimmer': 0.030, 'spread1': -5.0, 'spread2': 0.220, 'DFA': 0.710}
Parkinson Detected
