In [55]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import joblib


In [56]:
df = pd.read_csv('parkision.csv')  # Ensure this file matches the uploaded one
print(df.head())
print("\nMissing values:\n", df.isnull().sum())


           name  MDVP:Fo(Hz)  MDVP:Fhi(Hz)  MDVP:Flo(Hz)  MDVP:Jitter(%)  \
0  phon_R01_S01      119.992       157.302        74.997         0.00784   
1  phon_R01_S02      122.400       148.650       113.819         0.00968   
2  phon_R01_S03      116.682       131.111       111.555         0.01050   
3  phon_R01_S04      116.676       137.871       111.366         0.00997   
4  phon_R01_S05      116.014       141.781       110.655         0.01284   

   MDVP:Shimmer      RPDE       DFA  status  
0       0.04374  0.414783  0.815285       0  
1       0.06134  0.458304  0.819521       0  
2       0.05233  0.429899  0.825288       0  
3       0.05492  0.434969  0.819235       0  
4       0.06425  0.417356  0.823484       0  

Missing values:
 name              0
MDVP:Fo(Hz)       0
MDVP:Fhi(Hz)      0
MDVP:Flo(Hz)      0
MDVP:Jitter(%)    0
MDVP:Shimmer      0
RPDE              0
DFA               0
status            0
dtype: int64


In [69]:
X = df.drop(['name', 'status'], axis=1)
y = df['status']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

#X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
X_train = X_scaled
y_train = y


In [70]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
model.fit(X_train, y_train)



In [71]:
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 1.0
Confusion Matrix:
 [[2 0]
 [0 2]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       1.00      1.00      1.00         2

    accuracy                           1.00         4
   macro avg       1.00      1.00      1.00         4
weighted avg       1.00      1.00      1.00         4



In [72]:
joblib.dump(model, 'svc_model.pkl')
joblib.dump(scaler, 'scaler.pkl')


['scaler.pkl']

In [73]:
print(df['status'].value_counts())  # Should be reasonably balanced


status
0    10
1    10
Name: count, dtype: int64


In [None]:
#import google.generativeai as genai

# Set your API key
#genai.configure(api_key="AIzaSyDTjCC5GTBSS5MXWJzYzoPueYcmcv58Wqw")  # Replace with your key, or use os.environ["GOOGLE_API_KEY"]

#model = genai.GenerativeModel("gemini-1.5-flash")


In [63]:
#GOOGLE_API_KEY="AIzaSyDTjCC5GTBSS5MXWJzYzoPueYcmcv58Wqw"


In [None]:
"""

import os

from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.prompts import ChatPromptTemplate
"""

In [75]:

model = joblib.load('svc_model.pkl')
scaler = joblib.load('scaler.pkl')


In [77]:
# LLM for feature extraction
import google.generativeai as genai
genai.configure(api_key="AIzaSyDTjCC5GTBSS5MXWJzYzoPueYcmcv58Wqw")
llm_model = genai.GenerativeModel("gemini-1.5-flash")

# ML model for Parkinson's prediction
from sklearn.ensemble import RandomForestClassifier
import joblib

ml_model = joblib.load("svc_model.pkl")     # Your trained classifier
scaler = joblib.load("scaler.pkl")  

In [81]:
import re

def extract_features_from_text(description):
    prompt = f"""
    Given this voice description: "{description}", extract the following features as a Python list in this order:
    - MDVP:Fo(Hz)
    - MDVP:Fhi(Hz)
    - MDVP:Flo(Hz)
    - MDVP:Jitter(%)
    - MDVP:Shimmer
    - RPDE
    - DFA

    Only return the list in Python format. Example: [130.0, 165.0, 85.0, 0.007, 0.045, 0.42, 0.82]
    """

    response = llm_model.generate_content(prompt)
    response_text = response.text.strip()

    # Extract list using regex to avoid errors from extra text
    match = re.search(r'\[([^\[\]]+)\]', response_text)
    if match:
        try:
            features = [float(x.strip()) for x in match.group(1).split(',')]
            if len(features) != 7:
                return f"Error: Expected 7 features, got {len(features)}"
            return features
        except Exception as e:
            return f"Error converting to float: {e}"
    else:
        return f"Error: Could not find a valid list in response: {response_text}"
    
    


In [84]:
import pandas as pd

# Use same order of features as used during training
feature_names = ['MDVP:Fo(Hz)', 'MDVP:Fhi(Hz)', 'MDVP:Flo(Hz)',
                 'MDVP:Jitter(%)', 'MDVP:Shimmer', 'RPDE', 'DFA']

def predict_from_features(features):
    input_df = pd.DataFrame([features], columns=feature_names)
    features_scaled = scaler.transform(input_df)
    prediction = ml_model.predict(features_scaled)
    return "Parkinson's detected" if prediction[0] == 1 else "No Parkinson's detected"


In [82]:
user_sentence = "I recorded my voice and got 122 Hz for Fo, 146 for Fhi, 114 for Flo, jitter at 0.0075, shimmer 0.041, RPDE 0.43, and DFA 0.819."
#print(predict_from_text(user_sentence))


In [88]:
import warnings
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    features_scaled = scaler.transform([features])


In [89]:
features = extract_features_from_text(user_sentence)
features_scaled = scaler.transform([features])
prediction = ml_model.predict(features_scaled)




In [67]:
def predict_from_text(description):
    features = extract_features_from_text(description)
    if isinstance(features, str):
        return features  # error message
    features_scaled = scaler.transform([features])
    prediction = model.predict(features_scaled)
    return "Parkinson's detected" if prediction[0] == 1 else "No Parkinson's detected"


In [68]:
user_sentence = "I recorded my voice and got 122 Hz for Fo, 146 for Fhi, 114 for Flo, jitter at 0.0075, shimmer 0.041, RPDE 0.43, and DFA 0.819."
print(predict_from_text(user_sentence))


AttributeError: 'RandomForestClassifier' object has no attribute 'generate_content'