In [1]:
import pandas as pd
import numpy as np
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Read data from Excel file
data = pd.read_excel("Job_Scheduling.xlsx")

# Handle missing values
data.dropna(inplace=True)  # Drop rows with missing values

# Handle infinite values
data.replace([np.inf, -np.inf], np.nan, inplace=True)  # Replace infinite values with NaN
data.dropna(inplace=True)  # Drop rows with NaNs

# Assuming the columns are 'Burst time', 'Arrival Time', 'Preemptive', and 'Resources'
X = data[['Burst time', 'Arrival Time', 'Preemptive']].values
y = data['Resources'].values

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Naïve Bayes Classifier
classifier = GaussianNB()

# Train the classifier
classifier.fit(X_train, y_train)

# Make predictions on the test data
y_pred = classifier.predict(X_test)

# Evaluate the performance
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Accuracy: 0.0


In [4]:
import pandas as pd
import numpy as np
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

# Download Dermatology dataset from UCI repository
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/dermatology/dermatology.data"
column_names = ["erythema", "scaling", "definite_borders", "itching", "koebner_phenomenon",
                "polygonal_papules", "follicular_papules", "oral_mucosal_involvement",
                "knee_and_elbow_involvement", "scalp_involvement", "family_history", "melanin",
                "eosinophils", "PNL_infiltrate", "fibrosis_of_papillary_dermis", "exocytosis",
                "acanthosis", "hyperkeratosis", "parakeratosis", "clubbing_of_the_rete_ridges",
                "elongation_of_the_rete_ridges", "thinning_of_the_suprapapillary_epidermis",
                "spongiform_pustule", "munro_microabcess", "focal_hypergranulosis",
                "disappearance_of_the_granular_layer", "vacuolisation_and_damage_of_basal_layer",
                "spongiosis", "saw-tooth_appearance_of_retes", "follicular_horn_plug",
                "perifollicular_parakeratosis", "inflammatory_monoluclear_inflitrate",
                "band-like_infiltrate", "age", "class"]

data = pd.read_csv(url, header=None, names=column_names, na_values='?')

# Replace missing values with the most frequent value in each column
data = data.apply(lambda x: x.fillna(x.mode()[0]))

# Prepare data for classification
X = data.drop(columns=["class"])  # Features
y = data["class"]  # Target

# Vary train/test split and analyze performance
split_ratios = [(0.6, 0.4), (0.7, 0.3), (0.8, 0.2)]

for train_ratio, test_ratio in split_ratios:
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_ratio, random_state=42)

    # Initialize Naïve Bayes Classifier
    classifier = GaussianNB()

    # Train the classifier
    classifier.fit(X_train, y_train)

    # Make predictions on the test data
    y_pred = classifier.predict(X_test)

    # Evaluate the performance
    accuracy = accuracy_score(y_test, y_pred)
    confusion = confusion_matrix(y_test, y_pred)
    
    print(f"Train/Test Split Ratio: {int(train_ratio*100)}% / {int(test_ratio*100)}%")
    print("Accuracy:", accuracy)
    print("Confusion Matrix:")
    print(confusion)
    print()


Train/Test Split Ratio: 60% / 40%
Accuracy: 0.8775510204081632
Confusion Matrix:
[[52  0  0  0  0  0]
 [ 0  7  0 13  4  0]
 [ 0  0 27  0  0  0]
 [ 0  0  0 18  0  0]
 [ 0  1  0  0 20  0]
 [ 0  0  0  0  0  5]]

Train/Test Split Ratio: 70% / 30%
Accuracy: 0.8727272727272727
Confusion Matrix:
[[40  0  0  0  0  0]
 [ 0  3  0 11  2  0]
 [ 0  0 19  0  0  0]
 [ 0  0  0 15  0  0]
 [ 0  1  0  0 15  0]
 [ 0  0  0  0  0  4]]

Train/Test Split Ratio: 80% / 20%
Accuracy: 0.9054054054054054
Confusion Matrix:
[[31  0  0  0  0  0]
 [ 0  3  0  4  2  0]
 [ 0  0 13  0  0  0]
 [ 0  0  0  8  0  0]
 [ 0  1  0  0  9  0]
 [ 0  0  0  0  0  3]]



In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Step 1: Load the dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00267/data_banknote_authentication.txt"
columns = ['variance', 'skewness', 'curtosis', 'entropy', 'class']
data = pd.read_csv(url, names=columns)

# Step 2: Preprocess the dataset
X = data.drop('class', axis=1)
y = data['class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Apply KNN Algorithm
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train_scaled, y_train)
y_pred_knn = knn.predict(X_test_scaled)

# Step 4: Apply Naive Bayes Classifier
nb = GaussianNB()
nb.fit(X_train, y_train)
y_pred_nb = nb.predict(X_test)

# Step 5: Display Confusion Matrix and Evaluation Metrics
def evaluate_model(y_true, y_pred, model_name):
    print(f"Confusion Matrix for {model_name}:")
    print(confusion_matrix(y_true, y_pred))
    print(f"Accuracy: {accuracy_score(y_true, y_pred):.2f}")
    print(f"Precision: {precision_score(y_true, y_pred):.2f}")
    print(f"Recall: {recall_score(y_true, y_pred):.2f}")
    print(f"F1 Score: {f1_score(y_true, y_pred):.2f}\n")

print("Evaluation Results:")
evaluate_model(y_test, y_pred_knn, "KNN")
evaluate_model(y_test, y_pred_nb, "Naive Bayes")


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, f1_score

# Step 1: Load the dataset
data = pd.read_excel("weather.xlsx")

# Step 2: Preprocess the dataset (if needed)

# Step 3: Split the dataset into features and target variable
X = data.drop('target_column_name', axis=1)  # Adjust 'target_column_name' to the actual name of the target column
y = data['target_column_name']

# Step 4: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Apply Naïve Bayes
nb = GaussianNB()
nb.fit(X_train, y_train)
y_pred_nb = nb.predict(X_test)

# Step 6: Apply KNN
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)

# Step 7: Evaluate the models
accuracy_nb = accuracy_score(y_test, y_pred_nb)
f1_score_nb = f1_score(y_test, y_pred_nb, average='weighted')

accuracy_knn = accuracy_score(y_test, y_pred_knn)
f1_score_knn = f1_score(y_test, y_pred_knn, average='weighted')

# Step 8: Compare and analyze the performance
print("Naïve Bayes Performance:")
print(f"Accuracy: {accuracy_nb:.2f}")
print(f"F-measure: {f1_score_nb:.2f}\n")

print("KNN Performance:")
print(f"Accuracy: {accuracy_knn:.2f}")
print(f"F-measure: {f1_score_knn:.2f}\n")
