In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier

def preprocess_data(data):
    # Handle Missing Values
    for column in data.columns:
        if data[column].dtype == 'object':  # Categorical column
            data[column].fillna(data[column].mode()[0], inplace=True)
        else:  # Numerical column
            data[column].fillna(data[column].mean(), inplace=True)
    
    # Handle Categorical Variables
    categorical_columns = data.select_dtypes(include=['object']).columns
    for column in categorical_columns:
        if len(data[column].unique()) <= 2:  # Binary categorical column
            le = LabelEncoder()
            data[column] = le.fit_transform(data[column])
        else:  # Multi-class categorical column
            data = pd.get_dummies(data, columns=[column], drop_first=True)
    
    # Standardize/Normalize Data
    scaler = StandardScaler()
    numerical_columns = data.select_dtypes(exclude=['object']).columns
    data[numerical_columns] = scaler.fit_transform(data[numerical_columns])
    
    return data

# Load the Dataset
data_path = "C:/Users/Malco/OneDrive/Desktop/No-Code Project Classification/Obesity Classification.csv"  
data = pd.read_csv(data_path)

# Preprocess the Data
data = preprocess_data(data)

# Convert the target variable to integer type
y = data.iloc[:, -1].astype(int)

# Split the Data into Training and Testing Sets
X = data.iloc[:, :-1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define and Train Multiple Classifiers
classifiers = {
    "SVM": SVC(),
    "MLP": MLPClassifier(),
    "SGD": SGDClassifier(),
    "Naive Bayes": GaussianNB(),
    "Decision Tree": DecisionTreeClassifier()
}

results = {}

for name, clf in classifiers.items():
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    results[name] = (accuracy, report)

# Evaluate and Display Results
for name, (accuracy, report) in results.items():
    print(f"Classifier: {name}")
    print(f"Accuracy: {accuracy}")
    print("Classification Report:")
    print(report)
    print("-" * 50)


Classifier: SVM
Accuracy: 0.9545454545454546
Classification Report:
              precision    recall  f1-score   support

           0       0.93      1.00      0.97        14
           1       1.00      0.88      0.93         8

    accuracy                           0.95        22
   macro avg       0.97      0.94      0.95        22
weighted avg       0.96      0.95      0.95        22

--------------------------------------------------
Classifier: MLP
Accuracy: 1.0
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        14
           1       1.00      1.00      1.00         8

    accuracy                           1.00        22
   macro avg       1.00      1.00      1.00        22
weighted avg       1.00      1.00      1.00        22

--------------------------------------------------
Classifier: SGD
Accuracy: 0.9545454545454546
Classification Report:
              precision    recall  f1-score   support





In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier

def preprocess_data(data):
    # Handle Missing Values
    for column in data.columns:
        if data[column].dtype == 'object':  # Categorical column
            data[column].fillna(data[column].mode()[0], inplace=True)
        else:  # Numerical column
            data[column].fillna(data[column].mean(), inplace=True)
    
    # Handle Categorical Variables
    categorical_columns = data.select_dtypes(include=['object']).columns
    for column in categorical_columns:
        if len(data[column].unique()) <= 2:  # Binary categorical column
            le = LabelEncoder()
            data[column] = le.fit_transform(data[column])
        else:  # Multi-class categorical column
            data = pd.get_dummies(data, columns=[column], drop_first=True)
    
    # Standardize/Normalize Data
    scaler = StandardScaler()
    numerical_columns = data.select_dtypes(exclude=['object']).columns
    data[numerical_columns] = scaler.fit_transform(data[numerical_columns])
    
    return data

# Load the Dataset
data_path = "C:/Users/Malco/OneDrive/Desktop/No-Code Project Classification/healthcare-dataset-stroke-data.csv"  
data = pd.read_csv(data_path)

# Preprocess the Data
data = preprocess_data(data)

# Convert the target variable to integer type
y = data.iloc[:, -1].astype(int)

# Split the Data into Training and Testing Sets
X = data.iloc[:, :-1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define and Train Multiple Classifiers
classifiers = {
    "SVM": SVC(),
    "MLP": MLPClassifier(),
    "SGD": SGDClassifier(),
    "Naive Bayes": GaussianNB(),
    "Decision Tree": DecisionTreeClassifier()
}

results = {}

for name, clf in classifiers.items():
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    results[name] = (accuracy, report)

# Evaluate and Display Results
for name, (accuracy, report) in results.items():
    print(f"Classifier: {name}")
    print(f"Accuracy: {accuracy}")
    print("Classification Report:")
    print(report)
    print("-" * 50)


Classifier: SVM
Accuracy: 0.8688845401174168
Classification Report:
              precision    recall  f1-score   support

           0       0.88      0.98      0.93       869
           2       0.66      0.26      0.37       153

    accuracy                           0.87      1022
   macro avg       0.77      0.62      0.65      1022
weighted avg       0.85      0.87      0.84      1022

--------------------------------------------------
Classifier: MLP
Accuracy: 0.8610567514677103
Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.93      0.92       869
           2       0.54      0.45      0.49       153

    accuracy                           0.86      1022
   macro avg       0.72      0.69      0.71      1022
weighted avg       0.85      0.86      0.86      1022

--------------------------------------------------
Classifier: SGD
Accuracy: 0.8522504892367906
Classification Report:
              precision    recall  f1-sc



In [6]:
# Import necessary libraries and modules
import pandas as pd  # For data manipulation and analysis
from sklearn.model_selection import train_test_split  # For splitting data into training and testing sets
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder  # For data preprocessing
from sklearn.metrics import classification_report, accuracy_score  # For evaluating classifier performance
from sklearn.svm import SVC  # Support Vector Machine classifier
from sklearn.neural_network import MLPClassifier  # Multi-layer Perceptron classifier
from sklearn.linear_model import SGDClassifier  # Stochastic Gradient Descent classifier
from sklearn.naive_bayes import GaussianNB  # Gaussian Naive Bayes classifier
from sklearn.tree import DecisionTreeClassifier  # Decision Tree classifier

# Define a function to preprocess the data
def preprocess_data(data):
    # Handle Missing Values
    for column in data.columns:  # Loop through each column in the dataset
        # Check if the column is categorical (object type)
        if data[column].dtype == 'object':
            # Fill missing values with the mode (most frequent value) of the column
            data[column].fillna(data[column].mode()[0], inplace=True)
        else:  # If the column is numerical
            # Fill missing values with the mean (average) of the column
            data[column].fillna(data[column].mean(), inplace=True)
    
    # Handle Categorical Variables
    # Identify columns that are of object type (categorical)
    categorical_columns = data.select_dtypes(include=['object']).columns
    for column in categorical_columns:  # Loop through each categorical column
        # Check if the column is binary (has only 2 unique values)
        if len(data[column].unique()) <= 2:
            le = LabelEncoder()  # Initialize a label encoder
            # Convert the column values to numerical labels
            data[column] = le.fit_transform(data[column])
        else:  # If the column has more than 2 unique values
            # Convert the column to one-hot encoded columns
            data = pd.get_dummies(data, columns=[column], drop_first=True)
    
    # Standardize/Normalize Data
    scaler = StandardScaler()  # Initialize a standard scaler
    # Identify columns that are not of object type (numerical)
    numerical_columns = data.select_dtypes(exclude=['object']).columns
    # Standardize the numerical columns
    data[numerical_columns] = scaler.fit_transform(data[numerical_columns])
    
    return data  # Return the preprocessed data

# Load the Dataset from the specified path
data_path = "C:/Users/Malco/OneDrive/Desktop/No-Code Project Classification/Obesity Classification.csv"  
data = pd.read_csv(data_path)

# Preprocess the Data using the defined function
data = preprocess_data(data)

# Convert the target variable (last column) to integer type
y = data.iloc[:, -1].astype(int)

# Split the data into features (X) and target variable (y)
X = data.iloc[:, :-1]
# Split the data into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define a dictionary of classifiers to be used
classifiers = {
    "SVM": SVC(),
    "MLP": MLPClassifier(),
    "SGD": SGDClassifier(),
    "Naive Bayes": GaussianNB(),
    "Decision Tree": DecisionTreeClassifier()
}

results = {}  # Dictionary to store results for each classifier

# Train and evaluate each classifier
for name, clf in classifiers.items():
    clf.fit(X_train, y_train)  # Train the classifier on the training data
    y_pred = clf.predict(X_test)  # Predict the target variable for the test data
    accuracy = accuracy_score(y_test, y_pred)  # Calculate the accuracy of the classifier
    report = classification_report(y_test, y_pred)  # Generate a classification report
    results[name] = (accuracy, report)  # Store the results in the results dictionary

# Display the results for each classifier
for name, (accuracy, report) in results.items():
    print(f"Classifier: {name}")
    print(f"Accuracy: {accuracy}")
    print("Classification Report:")
    print(report)
    print("-" * 50)


Classifier: SVM
Accuracy: 0.9545454545454546
Classification Report:
              precision    recall  f1-score   support

           0       0.93      1.00      0.97        14
           1       1.00      0.88      0.93         8

    accuracy                           0.95        22
   macro avg       0.97      0.94      0.95        22
weighted avg       0.96      0.95      0.95        22

--------------------------------------------------
Classifier: MLP
Accuracy: 1.0
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        14
           1       1.00      1.00      1.00         8

    accuracy                           1.00        22
   macro avg       1.00      1.00      1.00        22
weighted avg       1.00      1.00      1.00        22

--------------------------------------------------
Classifier: SGD
Accuracy: 1.0
Classification Report:
              precision    recall  f1-score   support

           0   

