In [5]:
# K-Nearest Neighbors (K-NN) Accuracy Calculation

# Importing necessary libraries
import numpy as np  # For numerical operations like arrays and mathematical operations
import matplotlib.pyplot as plt  # For data visualization (although not used in this case)
import pandas as pd  # For handling data and datasets

# Importing the dataset
dataset = pd.read_csv('Data.csv')  # Load your dataset, replace the placeholder with actual filename
X = dataset.iloc[:, :-1].values  # Selecting all the columns except the last one as the features (X)
y = dataset.iloc[:, -1].values  # Selecting the last column as the target variable (y)

# Splitting the dataset into Training and Test sets
from sklearn.model_selection import train_test_split  # Import train_test_split for splitting data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)
# Split data into training (75%) and testing (25%) with random_state set for reproducibility

# Feature Scaling (normalizing the data for consistency and better performance of models)
from sklearn.preprocessing import StandardScaler  # Import StandardScaler to normalize the features
sc = StandardScaler()  # Initialize the StandardScaler
X_train = sc.fit_transform(X_train)  # Fit and transform the training data to scale it
X_test = sc.transform(X_test)  # Transform the test data using the same scaler (to avoid data leakage)

# Initializing and training the K-Nearest Neighbors classifier
from sklearn.neighbors import KNeighborsClassifier  # Import KNeighborsClassifier from scikit-learn
classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
# Create a KNN classifier with:
# - 5 nearest neighbors
# - Minkowski distance metric (a generalization of Euclidean distance)
# - p=2 indicates the use of Euclidean distance (p=1 for Manhattan distance)
classifier.fit(X_train, y_train)  # Train the classifier on the scaled training data

# Predicting the results for the Test set
from sklearn.metrics import confusion_matrix, accuracy_score  # Import metrics for evaluating the model
y_pred = classifier.predict(X_test)  # Make predictions on the test data

# Creating the Confusion Matrix to evaluate the model performance
cm = confusion_matrix(y_test, y_pred)  # Generate confusion matrix to compare true and predicted values
print(cm)  # Display the confusion matrix

# Calculating the accuracy score of the classifier
accuracy = accuracy_score(y_test, y_pred)  # Calculate the accuracy as the ratio of correct predictions
print("Accuracy: {:.2f}%".format(accuracy * 100))  # Print accuracy as a percentage with two decimal places


[[103   4]
 [  5  59]]
Accuracy: 94.74%


In [7]:
# Decision Tree Classification

# Importing necessary libraries
import numpy as np  # For numerical operations like arrays and mathematical functions
import matplotlib.pyplot as plt  # For plotting and visualizing the data
import pandas as pd  # For handling datasets and data manipulation

# Importing the dataset
dataset = pd.read_csv('Data.csv')  # Replace with the actual dataset filename
X = dataset.iloc[:, :-1].values  # Selecting all the columns except the last one as features (X)
y = dataset.iloc[:, -1].values  # Selecting the last column as the target variable (y)

# Splitting the dataset into Training and Test sets
from sklearn.model_selection import train_test_split  # Import train_test_split from scikit-learn
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)
# Split the data into training (75%) and test (25%) with random_state set for reproducibility

# Feature Scaling (normalize the data for better performance of models)
from sklearn.preprocessing import StandardScaler  # Import StandardScaler to scale the features
sc = StandardScaler()  # Initialize StandardScaler
X_train = sc.fit_transform(X_train)  # Fit and transform the training data to scale it
X_test = sc.transform(X_test)  # Transform the test data using the same scaler (to avoid data leakage)

# Training the Decision Tree classifier
from sklearn.tree import DecisionTreeClassifier  # Import DecisionTreeClassifier from scikit-learn
classifier = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
# Create a Decision Tree classifier using the 'entropy' criterion for splitting (information gain)
classifier.fit(X_train, y_train)  # Train the classifier on the scaled training data

# Predicting the results for the Test set
from sklearn.metrics import confusion_matrix, accuracy_score  # Import necessary metrics
y_pred = classifier.predict(X_test)  # Make predictions on the test data

# Creating the Confusion Matrix to evaluate the model performance
cm = confusion_matrix(y_test, y_pred)  # Generate the confusion matrix to compare true vs predicted values
print(cm)  # Display the confusion matrix

# Calculating the accuracy score of the classifier
accuracy = accuracy_score(y_test, y_pred)  # Calculate accuracy by comparing predictions with actual labels
print("Accuracy: {:.2f}%".format(accuracy * 100))  # Print accuracy as a percentage with two decimal places


[[103   4]
 [  3  61]]
Accuracy: 95.91%


In [8]:
# Kernel Support Vector Machine (SVM)

# Importing necessary libraries
import numpy as np  # For numerical operations like arrays and mathematical functions
import matplotlib.pyplot as plt  # For plotting and visualizing the data
import pandas as pd  # For handling datasets and data manipulation

# Importing the dataset
dataset = pd.read_csv('Data.csv')  # Load dataset from CSV file (replace 'Data.csv' with the actual dataset filename)
X = dataset.iloc[:, :-1].values  # Select all columns except the last one as feature variables (X)
y = dataset.iloc[:, -1].values  # Select the last column as the target variable (y)

# Splitting the dataset into Training and Test sets
from sklearn.model_selection import train_test_split  # Import train_test_split from scikit-learn
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)
# Split the data into training (75%) and test (25%) with random_state set for reproducibility

# Feature Scaling (normalize the data for better performance of models)
from sklearn.preprocessing import StandardScaler  # Import StandardScaler to scale the features
sc = StandardScaler()  # Initialize StandardScaler
X_train = sc.fit_transform(X_train)  # Fit and transform the training data to scale it
X_test = sc.transform(X_test)  # Transform the test data using the same scaler (to avoid data leakage)

# Training the Kernel SVM classifier with Radial Basis Function (RBF) kernel
from sklearn.svm import SVC  # Import Support Vector Classifier from scikit-learn
classifier = SVC(kernel = 'rbf', random_state = 0)
# Create a Support Vector Classifier using RBF kernel, random_state set for reproducibility
classifier.fit(X_train, y_train)  # Train the classifier on the scaled training data

# Predicting the results for the Test set
from sklearn.metrics import confusion_matrix, accuracy_score  # Import necessary metrics
y_pred = classifier.predict(X_test)  # Make predictions on the test data

# Creating the Confusion Matrix to evaluate the model performance
cm = confusion_matrix(y_test, y_pred)  # Generate the confusion matrix to compare true vs predicted values
print(cm)  # Display the confusion matrix

# Calculating the accuracy score of the classifier
accuracy = accuracy_score(y_test, y_pred)  # Calculate accuracy by comparing predictions with actual labels
print("Accuracy: {:.2f}%".format(accuracy * 100))  # Print accuracy as a percentage with two decimal places


[[102   5]
 [  3  61]]
Accuracy: 95.32%


In [9]:
# Logistic Regression

# Importing necessary libraries
import numpy as np  # For numerical computations and handling arrays
import matplotlib.pyplot as plt  # For visualization (if needed)
import pandas as pd  # For handling datasets and data manipulation

# Importing the dataset
dataset = pd.read_csv('Data.csv')  # Load dataset from CSV file (Replace 'Data.csv' with actual dataset filename)
X = dataset.iloc[:, :-1].values  # Selecting all columns except the last one as feature variables (independent variables)
y = dataset.iloc[:, -1].values  # Selecting the last column as the target variable (dependent variable)

# Splitting the dataset into Training and Test sets
from sklearn.model_selection import train_test_split  # Import train_test_split from scikit-learn
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)
# 75% data used for training, 25% for testing, random_state ensures reproducibility

# Feature Scaling (standardizing data to improve model performance)
from sklearn.preprocessing import StandardScaler  # Import StandardScaler for normalization
sc = StandardScaler()  # Initialize StandardScaler
X_train = sc.fit_transform(X_train)  # Fit and transform the training data
X_test = sc.transform(X_test)  # Transform the test data using the same scaling parameters

# Training the Logistic Regression Model
from sklearn.linear_model import LogisticRegression  # Import Logistic Regression model
classifier = LogisticRegression(random_state=0)  # Initialize Logistic Regression with a fixed random state
classifier.fit(X_train, y_train)  # Train the model using training data

# Predicting the Test set results
from sklearn.metrics import confusion_matrix, accuracy_score  # Import performance evaluation metrics
y_pred = classifier.predict(X_test)  # Predict class labels for test set

# Creating the Confusion Matrix to evaluate the model performance
cm = confusion_matrix(y_test, y_pred)  # Generate the confusion matrix to compare true vs predicted values
print("Confusion Matrix:\n", cm)  # Display the confusion matrix

# Calculating and printing the accuracy score
accuracy = accuracy_score(y_test, y_pred)  # Compute accuracy as the proportion of correctly predicted instances
print("\nAccuracy: {:.2f}%".format(accuracy * 100))  # Print accuracy percentage with two decimal places


Confusion Matrix:
 [[103   4]
 [  5  59]]

Accuracy: 94.74%


In [10]:
# Naïve Bayes Classification

# Importing necessary libraries
import numpy as np  # For handling arrays and mathematical operations
import matplotlib.pyplot as plt  # For visualization (if needed)
import pandas as pd  # For handling datasets and data manipulation

# Importing the dataset
dataset = pd.read_csv('Data.csv')  # Load dataset from CSV file (replace 'Data.csv' with actual filename)
X = dataset.iloc[:, :-1].values  # Extracting all columns except the last one as features (independent variables)
y = dataset.iloc[:, -1].values  # Extracting the last column as the target variable (dependent variable)

# Splitting the dataset into Training and Test sets
from sklearn.model_selection import train_test_split  # Import train_test_split from scikit-learn
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)
# 75% of data used for training, 25% for testing, random_state ensures reproducibility

# Feature Scaling (normalizing data to improve model performance)
from sklearn.preprocessing import StandardScaler  # Import StandardScaler for normalization
sc = StandardScaler()  # Initialize StandardScaler
X_train = sc.fit_transform(X_train)  # Compute mean & std from training data and apply scaling
X_test = sc.transform(X_test)  # Apply same scaling to test data

# Training the Naïve Bayes model
from sklearn.naive_bayes import GaussianNB  # Import Gaussian Naïve Bayes classifier
classifier = GaussianNB()  # Initialize Naïve Bayes classifier
classifier.fit(X_train, y_train)  # Train the classifier using the training data

# Predicting the Test set results
y_pred = classifier.predict(X_test)  # Predict class labels for test data

# Creating the Confusion Matrix to evaluate model performance
from sklearn.metrics import confusion_matrix, accuracy_score  # Import evaluation metrics
cm = confusion_matrix(y_test, y_pred)  # Compute confusion matrix
print("Confusion Matrix:\n", cm)  # Display the confusion matrix

# Calculating and printing the accuracy score
accuracy = accuracy_score(y_test, y_pred)  # Compute accuracy as correctly classified samples / total samples
print("\nAccuracy: {:.2f}%".format(accuracy * 100))  # Print accuracy percentage with two decimal places


Confusion Matrix:
 [[99  8]
 [ 2 62]]

Accuracy: 94.15%


In [11]:
# Random Forest Classification

# Importing necessary libraries
import numpy as np  # For handling arrays and mathematical operations
import matplotlib.pyplot as plt  # For visualization (if needed)
import pandas as pd  # For handling datasets and data manipulation

# Importing the dataset
dataset = pd.read_csv('Data.csv')  # Load dataset from CSV file (replace 'Data.csv' with actual filename)
X = dataset.iloc[:, :-1].values  # Extracting all columns except the last one as features (independent variables)
y = dataset.iloc[:, -1].values  # Extracting the last column as the target variable (dependent variable)

# Splitting the dataset into Training and Test sets
from sklearn.model_selection import train_test_split  # Import train_test_split from scikit-learn
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)
# 75% of data used for training, 25% for testing, random_state ensures reproducibility

# Feature Scaling (normalizing data to improve model performance)
from sklearn.preprocessing import StandardScaler  # Import StandardScaler for normalization
sc = StandardScaler()  # Initialize StandardScaler
X_train = sc.fit_transform(X_train)  # Compute mean & std from training data and apply scaling
X_test = sc.transform(X_test)  # Apply same scaling to test data

# Training the Random Forest Classifier
from sklearn.ensemble import RandomForestClassifier  # Import Random Forest classifier
classifier = RandomForestClassifier(n_estimators=10, criterion='entropy', random_state=0)
# Using 10 decision trees, criterion='entropy' measures information gain for splitting nodes
classifier.fit(X_train, y_train)  # Train the classifier using the training data

# Predicting the Test set results
y_pred = classifier.predict(X_test)  # Predict class labels for test data

# Creating the Confusion Matrix to evaluate model performance
from sklearn.metrics import confusion_matrix, accuracy_score  # Import evaluation metrics
cm = confusion_matrix(y_test, y_pred)  # Compute confusion matrix
print("Confusion Matrix:\n", cm)  # Display the confusion matrix

# Calculating and printing the accuracy score
accuracy = accuracy_score(y_test, y_pred)  # Compute accuracy as correctly classified samples / total samples
print("\nAccuracy: {:.2f}%".format(accuracy * 100))  # Print accuracy percentage with two decimal places


Confusion Matrix:
 [[102   5]
 [  6  58]]

Accuracy: 93.57%


In [13]:
# Support Vector Machine (SVM) Classification

# Importing necessary libraries
import numpy as np  # For numerical operations
import matplotlib.pyplot as plt  # For visualization (if needed)
import pandas as pd  # For handling datasets

# Importing the dataset
dataset = pd.read_csv('Data.csv')  # Load dataset (replace with actual filename)
X = dataset.iloc[:, :-1].values  # Selecting all columns except the last one as features (independent variables)
y = dataset.iloc[:, -1].values  # Selecting the last column as the target variable (dependent variable)

# Splitting the dataset into Training and Test sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)
# 75% training data, 25% testing data

# Feature Scaling (Standardizing the features for better SVM performance)
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)  # Fit and transform the training data
X_test = sc.transform(X_test)  # Transform the test data using the same scale

# Training the Support Vector Machine (SVM) classifier with a linear kernel
from sklearn.svm import SVC
classifier = SVC(kernel='linear', random_state=0)  # Using linear kernel
classifier.fit(X_train, y_train)  # Training the SVM model

# Making Predictions on the Test Set
y_pred = classifier.predict(X_test)

# Evaluating the Model
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)  # Compute confusion matrix
print("Confusion Matrix:\n", cm)  # Display confusion matrix

# Calculating and displaying the accuracy
accuracy = accuracy_score(y_test, y_pred)  # Compute accuracy
print("\nAccuracy: {:.2f}%".format(accuracy * 100))  # Print accuracy percentage


Confusion Matrix:
 [[102   5]
 [  5  59]]

Accuracy: 94.15%
