# Random Forest Classifier

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# READ DATASET
data = pd.read_csv('diabetes.csv')

In [2]:
# Impute missing values with the mean of the respective column
data.fillna(data.mean(), inplace=True)

In [3]:
# Split dataset into features (X) and labels (y)
X = data.drop(columns=['Outcome'])
y = data['Outcome']

In [4]:
# Split your data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=18)

In [5]:
# Feature Scaling
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [6]:
# Create and train the RandomForestClassifier

Classifier = RandomForestClassifier(n_estimators = 100, max_depth = 18, max_features = 17, bootstrap = True, random_state = 18).fit(X_train, y_train)

In [7]:
# Create predictions
y_pred = Classifier.predict(X_test)

In [8]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.98


In [9]:
# Calculate precision
precision = precision_score(y_test, y_pred)
print("Precision:", precision)

Precision: 0.972972972972973


In [10]:
# Calculate recall
recall = recall_score(y_test, y_pred)
print("Recall:", recall)

Recall: 0.972972972972973


In [11]:
# Calculate F1-Score
f1 = f1_score(y_test, y_pred)
print("F1-Score:", f1)

F1-Score: 0.972972972972973


In [12]:
# Calculate and print confusion matrix
confusion = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(confusion)

Confusion Matrix:
[[248   4]
 [  4 144]]


In [16]:
# Input feature values and get predictions
def predict_diabetes():
    feature_values = []
    columns = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']
    for column in columns:
        value = float(input(f"Enter {column}: "))
        feature_values.append(value)
    scaled_features = sc.transform([feature_values])
    result = Classifier.predict(scaled_features)
    return result

In [17]:
    try:
        prediction = predict_diabetes()
        if prediction[0] == 1:
            print("-------------------------------------")
            print("Prediction : Person may have Diabetes")
        else:
            print("-----------------------------------------")
            print("Prediction : Person may not have Diabetes")
    except ValueError:
        print("Invalid input. Please enter numerical values for features.")

Enter Pregnancies: 2
Enter Glucose: 10
Enter BloodPressure: 5
Enter SkinThickness: 2
Enter Insulin: 3
Enter BMI: 45
Enter DiabetesPedigreeFunction: 56
Enter Age: 12
The person may not have Diabetes.


