![0_7p7zUmwZ02iokYmP.jpg](attachment:dce9d31b-efc3-4c94-8d8b-1599f293cd52.jpg)

# Import necessary libraries

In [None]:
import numpy as np  # NumPy for numerical computations (arrays, matrices)
import pandas as pd  # pandas for data manipulation (DataFrames)
import matplotlib.pyplot as plt  # matplotlib for visualizations (plots)
import seaborn as sns  # seaborn for enhanced visualizations (built on top of matplotlib)

# Splitting data for training and testing

In [None]:
from sklearn.model_selection import train_test_split  # Split data into training and testing sets for model evaluation


# Machine Learning Algorithms

In [None]:
from sklearn.linear_model import LogisticRegression  # Logistic Regression for classification tasks
from sklearn.svm import SVC  # Support Vector Machine for classification (including non-linear data)
from sklearn.neighbors import KNeighborsClassifier  # KNN for classification


## Ensemble Learning Algorithms (combining multiple models for better performance)

In [None]:

from sklearn.ensemble import RandomForestClassifier  # Random Forest for robust classification
from sklearn.ensemble import GradientBoostingClassifier  # Gradient Boosting for decision tree-based learning with improved accuracy


# Evaluation Metrics (assessing model performance)

In [None]:
from sklearn.metrics import confusion_matrix  # Visualize model predictions vs. true labels
from sklearn.metrics import accuracy_score, recall_score, f1_score  # Calculate common performance metrics


# Customization (optional)

In [None]:
plt.style.use('fivethirtyeight')  # Set a pleasing plot style (optional)

In [None]:
data = pd.read_csv('/kaggle/input/diabetes-data-set/diabetes.csv')
data

# Explore Data 📊

In [None]:
data.head() # five top data rows

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
data.duplicated().sum()

# Data Analysis 📈

In [None]:
data.corr()

In [None]:
sns.heatmap(data.corr(),annot =True,fmt = '0.2f',linewidth = .6)

In [None]:
sns.countplot(x = 'Outcome',data = data , palette = ['g','r'])

# Ignore Warnings Masseges

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
sns.displot(data.BMI)

In [None]:
sns.boxplot(data.Age)

# Module Creation

In [None]:
# Separate features (X) and target variable (y)

x = data.drop('Outcome', axis=1)  # Features for model training
y = data['Outcome']              # Target variable for prediction

In [None]:
# Split data into training and testing sets (recommended for evaluation)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [None]:
# create instances of machine learning algorithms classes

logistic = LogisticRegression()
svm = SVC(kernel='linear')
knn = KNeighborsClassifier(n_neighbors=5)
random_forset = RandomForestClassifier()
gradient_boost = GradientBoostingClassifier()

In [None]:
# Function to Evaluates a machine learning model's performanc

def model_cal(model):
    # Print model information
    print("Evaluating Model:", model)

    # Train the model on the training data
    model.fit(x_train, y_train)

    # Make predictions on the testing data
    prediction = model.predict(x_test)

    # Evaluate model performance using various metrics

    # Accuracy: Proportion of correct predictions
    accuracy = accuracy_score(prediction, y_test)
    print("Accuracy Score:", accuracy)

    # Recall: Proportion of true positives correctly identified
    recall = recall_score(prediction, y_test)
    print("Recall Score:", recall)

    # F1 Score: Harmonic mean of precision and recall
    f1 = f1_score(prediction, y_test)
    print("F1 Score:", f1)
    sns.heatmap(confusion_matrix(prediction, y_test), annot=True ,fmt = '0.2f',linewidth = .9)
    print('\n','\n')
    



# LogisticRegression Algorithm

In [None]:
model_cal(logistic)

# Support Vector Machine

In [None]:
model_cal(svm)

# KNN Algorithms

In [None]:
model_cal(knn)

# Random Forest Algorithms

In [None]:
model_cal(random_forset)

# Gradient Boosting

In [None]:
model_cal(gradient_boost)