# Random Forest Classifier

In [43]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# READ DATASET
dataset = pd.read_csv('diabetes.csv')

In [44]:
# Impute missing values with the mean of the respective column
data.fillna(data.mean(), inplace=True)

In [45]:
# Split dataset into features (X) and labels (y)
X = data.drop(columns=['Outcome'])
y = data['Outcome']

In [46]:
# Split your data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=18)

In [47]:
# Feature Scaling
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [48]:
# Create and train the RandomForestClassifier

Classifier = RandomForestClassifier(n_estimators = 100, max_depth = 18, max_features = 17, bootstrap = True, random_state = 18).fit(X_train, y_train)

In [49]:
# Create predictions
y_pred = Classifier.predict(X_test)

In [50]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.98


In [51]:
# Calculate precision
precision = precision_score(y_test, y_pred)
print("Precision:", precision)

Precision: 0.972972972972973


In [52]:
# Calculate recall
recall = recall_score(y_test, y_pred)
print("Recall:", recall)

Recall: 0.972972972972973


In [53]:
# Calculate F1-Score
f1 = f1_score(y_test, y_pred)
print("F1-Score:", f1)

F1-Score: 0.972972972972973


In [54]:
# Calculate and print confusion matrix
confusion = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(confusion)

Confusion Matrix:
[[248   4]
 [  4 144]]


# Gradient Boosting Classifier

In [55]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# READ DATASET
dataset = pd.read_csv('diabetes.csv')

In [56]:
# Impute missing values with the mean of the respective column
data.fillna(data.mean(), inplace=True)

In [57]:
# Split dataset into features (X) and labels (y)
X = data.drop(columns=['Outcome'])
y = data['Outcome']

In [58]:
# Split your data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=18)

In [59]:
# Feature Scaling
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [60]:
# Create and train the GradientBoostingClassifier
clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=18)
clf.fit(X_train, y_train)

# Make predictions on the test set
y_pred = clf.predict(X_test)

In [61]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.8325


In [64]:
# Calculate precision
precision = precision_score(y_test, y_pred)
print("Precision:", precision)

Precision: 0.8091603053435115


In [65]:
# Calculate recall
recall = recall_score(y_test, y_pred)
print("Recall:", recall)

Recall: 0.7162162162162162


In [66]:
# Calculate F1-Score
f1 = f1_score(y_test, y_pred)
print("F1-Score:", f1)

F1-Score: 0.7598566308243728


In [67]:
# Calculate and print confusion matrix
confusion = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(confusion)

Confusion Matrix:
[[227  25]
 [ 42 106]]
