In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder

def load_data(file_path):
    df = pd.read_csv(file_path)
    return df

def preprocess_data(df):
    label_encoder = LabelEncoder()
    
    # Convert 'Yes'/'No' to 1/0 for all symptom columns
    for column in df.columns:
        if df[column].dtype == 'object' and column != 'Disease':  # Avoid encoding 'Disease' here
            df[column] = df[column].map({'Yes': 1, 'No': 0})
    
    df['Disease'] = label_encoder.fit_transform(df['Disease'])
    
    X = df.drop(columns=['Disease'])
    y = df['Disease']
    return X, y, label_encoder

def train_model(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = DecisionTreeClassifier()
    model.fit(X_train, y_train)
    return model

def predict_disease(model, label_encoder, symptoms):
    input_data = pd.DataFrame([symptoms], columns=model.feature_names_in_)
    prediction = model.predict(input_data)
    return label_encoder.inverse_transform(prediction)[0]

if __name__ == "__main__":
    file_path = "data.csv"  # Update with actual path
    df = load_data(file_path)
    X, y, label_encoder = preprocess_data(df)
    model = train_model(X, y)
    
    # Example patient symptoms
    patient_symptoms = {'Fever': 1, 'Cough': 0, 'Fatigue': 1, 'Difficulty Breathing': 0, 'Blood Pressure': 0, 'Cholesterol Level': 1}
    predicted_disease = predict_disease(model, label_encoder, patient_symptoms)
    print("Predicted Disease:", predicted_disease)
    


Predicted Disease: Migraine


In [23]:
import pandas as pd
import numpy as np

# Load the CSV file
file_path = "data.csv"  # Update with actual path
df = pd.read_csv(file_path)

# Add a new column with random values between 1 and 100
df["Total Time Taken"] = np.random.randint(10, 60, size=len(df))

# Save the updated CSV file
df.to_csv(file_path, index=False)

print("New column 'Total Time Taken' added successfully!")

New column 'Total Time Taken' added successfully!


In [5]:
import pandas as pd

# Load the CSV file
file_path = "data.csv"  # Update with actual path
df = pd.read_csv(file_path)

# Function to get total time taken based on predicted disease
def get_total_time_by_disease(disease_name):
    matched_row = df[df["Disease"] == disease_name]  # Find row with the given disease
    
    if not matched_row.empty:
        return matched_row["Total Time Taken"].values[0]  # Return total time taken
    else:
        return "Disease not found in dataset"



# Get total time taken for the disease
total_time = get_total_time_by_disease(predicted_disease)
print(f"Total Time Taken for {predicted_disease}: {total_time}")


Total Time Taken for Migraine: 38
