In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_iris

class SVM:
    def __init__(self, lr=0.001, reg=0.01, epochs=1000):
        self.lr = lr
        self.reg = reg
        self.epochs = epochs
        self.w = None
        self.b = 0
    def fit(self, X, y):
        self.w = np.zeros(X.shape[1])
        self.b = 0

        for epoch in range(self.epochs):
            for i in range(len(X)):
                condition = y[i] * (np.dot(X[i], self.w) + self.b)

                if condition >= 1:
                    dw = self.reg * self.w
                    db = 0
                else:
                    dw = self.reg * self.w - np.dot(X[i], y[i])
                    db = -y[i]

                self.w -= self.lr * dw
                self.b -= self.lr * db

    def predict(self, X):
        return np.sign(np.dot(X, self.w) + self.b)
iris = load_iris()
X = iris.data
y = iris.target

y_binary = np.where(y == 0, 1, -1)

X_train, X_test, y_train, y_test = train_test_split(X, y_binary, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

svm = SVM(lr=0.01, reg=0.01, epochs=1000)
svm.fit(X_train, y_train)

predictions = svm.predict(X_test)

class_names = iris.target_names
predicted_class_names = [class_names[0] if p == 1 else class_names[1] if p == 0 else class_names[2] for p in predictions]

distinct_classes = np.unique(predicted_class_names)
print("\nDistinct Predicted Classes:")
print(distinct_classes)


Distinct Predicted Classes:
['setosa' 'virginica']


In [None]:
import pandas as pd
import numpy as np

df = pd.read_csv('/content/gnb_diabetes_dataset.csv')
X = df.drop('class', axis=1).values
y = df['class'].values
print(df.head())
classes, class_counts = np.unique(y, return_counts=True)
prior_probabilities = class_counts / len(y)
means = {}
variances = {}
for class_label in classes:
    X_class = X[y == class_label]
    means[class_label] = np.mean(X_class, axis=0)
    variances[class_label] = np.var(X_class, axis=0)
def gaussian_pdf(x, mean, var):
    return (1 / np.sqrt(2 * np.pi * var)) * np.exp(-(x - mean) ** 2 / (2 * var))
def gnb_predict(X):
    predictions = []
    for sample in X:
        class_probabilities = []
        for class_label in classes:
            class_prob = np.log(prior_probabilities[class_label])
            for i in range(len(sample)):
                mean = means[class_label][i]
                var = variances[class_label][i]
                class_prob += np.log(gaussian_pdf(sample[i], mean, var))

            class_probabilities.append(class_prob)
        predicted_class = classes[np.argmax(class_probabilities)]
        predictions.append(predicted_class)
    return np.array(predictions)

user_input = input("Enter the feature values separated by spaces: ")
new_data = np.array([list(map(float, user_input.split()))])
predictions = gnb_predict(new_data)
print(f"Predictions for new data {new_data}:{predictions}")


   Age  Glucose   BMI  class
0   25       85  22.5      0
1   35       90  24.1      0
2   50      140  30.5      1
3   45      130  28.2      1
4   30      100  26.5      0
Enter the feature values separated by spaces: 52 150 32.15 
Predictions for new data [[ 52.   150.    32.15]]:[1]


In [None]:
import pandas as pd
import math

df = pd.read_csv('/content/MNB_dataset.csv')
new_input = input("Enter the words in the new document (space-separated): ")
new_doc = new_input.split()

total_sports = df['sports'].sum()
total_politics = df['politics'].sum()

total_words = total_sports + total_politics

P_sports = total_sports / total_words
P_politics = total_politics / total_words

def calculate_likelihood(word, class_name):
    word_count = df.loc[df['word'] == word, class_name].values
    if len(word_count) == 0:  # If the word is not in the dataset, use Laplace smoothing
        return 1 / (total_sports + len(df)) if class_name == 'sports' else 1 / (total_politics + len(df))
    else:
        return (word_count[0] + 1) / (total_sports + len(df)) if class_name == 'sports' else (word_count[0] + 1) / (total_politics + len(df))

# Step 6: Calculate P(new_doc | sports) and P(new_doc | politics)
P_new_doc_given_sports = P_sports
P_new_doc_given_politics = P_politics

for word in new_doc:
    P_new_doc_given_sports *= calculate_likelihood(word, 'sports')
    P_new_doc_given_politics *= calculate_likelihood(word, 'politics')

posterior_sports = P_new_doc_given_sports * P_sports
posterior_politics = P_new_doc_given_politics * P_politics

total_posterior = posterior_sports + posterior_politics
posterior_sports /= total_posterior
posterior_politics /= total_posterior

print(f"Posterior probability for sports: {posterior_sports}")
print(f"Posterior probability for politics: {posterior_politics}")

if posterior_sports > posterior_politics:
    print("The document is classified as: sports")
else:
    print("The document is classified as: politics")


Enter the words in the new document (space-separated): election
Posterior probability for sports: 0.2125984251968504
Posterior probability for politics: 0.7874015748031497
The document is classified as: politics
