In [1]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import numpy as np

In [2]:
# Load the dataset
file_path = 'Training.csv'
data = pd.read_csv(file_path)
data.head()

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,prognosis,Unnamed: 133
0,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,Fungal infection,
1,0,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,Fungal infection,
2,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,Fungal infection,
3,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,Fungal infection,
4,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,Fungal infection,


In [3]:
# Handling missing values and removing unnecessary columns
data = data.drop(columns=data.columns[-1])  # Drop the last column if it's unnamed or irrelevant
data = data.dropna()  # Drop rows with NaN values

# Splitting the dataset into features (X) and target (y)
X = data.drop('prognosis', axis=1)
y = data['prognosis']

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [4]:
# Creating and training the model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Feature importance
feature_importances = model.feature_importances_
feature_names = X.columns
feature_importance_info = sorted(zip(feature_importances, feature_names), reverse=True)

# Selecting top 10 features
top_10_features = feature_importance_info[:10]
top_10_feature_names = [feature[1] for feature in top_10_features]

In [5]:

# Model evaluation
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)


In [6]:
print("Data")

print(top_10_features, accuracy, report)

Data
[(0.018859790969700526, 'muscle_pain'), (0.016080670302876496, 'itching'), (0.015616227862390914, 'mild_fever'), (0.015468301124013348, 'joint_pain'), (0.014903570285950525, 'yellowing_of_eyes'), (0.014699506900603034, 'family_history'), (0.014648919700597642, 'chest_pain'), (0.014624481259313711, 'high_fever'), (0.01418711144549801, 'abdominal_pain'), (0.01381074869118743, 'fatigue')] 1.0                                          precision    recall  f1-score   support

(vertigo) Paroymsal  Positional Vertigo       1.00      1.00      1.00        32
                                   AIDS       1.00      1.00      1.00        39
                                   Acne       1.00      1.00      1.00        41
                    Alcoholic hepatitis       1.00      1.00      1.00        36
                                Allergy       1.00      1.00      1.00        35
                              Arthritis       1.00      1.00      1.00        36
                       Bronchial A

In [7]:
def user_input_to_prediction(model, feature_names):
    user_data = {}
    for feature in feature_names:
        # Assuming all features are binary (0 or 1)
        user_input = input(f"Enter value for {feature} (0 or 1): ")
        user_data[feature] = int(user_input)

    # Convert user data to a DataFrame
    user_data_df = pd.DataFrame([user_data])

    # Adding zeros for all other features not in the top 10
    for col in X.columns:
        if col not in user_data_df.columns:
            user_data_df[col] = 0

    # Reordering columns to match the training data
    user_data_df = user_data_df[X.columns]

    # Making a prediction
    prediction = model.predict(user_data_df)
    return prediction[0]

In [8]:
# Example usage
print("Please input the values for the following features:")
predicted_prognosis = user_input_to_prediction(model, top_10_feature_names)
print(f"The predicted prognosis is: {predicted_prognosis}")

Please input the values for the following features:
