# General Machine Learning - Novice Level

Pick **ONE** task and complete it.

**Tasks:**
1. Linear Regression
2. Classification Problem
3. Clustering

In [1]:
# Import libraries
import numpy as np

## Task 1: Linear Regression

In [None]:
# Load data

In [None]:
# Build model

In [None]:
# Train

In [None]:
# Visualize results

## Task 2: Classification Problem

In [2]:
# Load dataset

# Generate simple synthetic dataset
np.random.seed(42)

n = 100
X_class0 = np.random.randn(n, 2) + np.array([-1, -1])
X_class1 = np.random.randn(n, 2) * 1.5 + np.array([1, 1])

X = np.vstack((X_class0, X_class1))
y = np.hstack((np.zeros(n), np.ones(n))).reshape(-1, 1)

# Shuffle dataset
indices = np.arange(X.shape[0])
np.random.shuffle(indices)
X, y = X[indices], y[indices]

# Split into train and test
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]


In [3]:
# Try classifier 1

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def compute_loss(y_true, y_pred):
    m = y_true.shape[0]
    return - (1/m) * np.sum(y_true * np.log(y_pred + 1e-9) + (1 - y_true) * np.log(1 - y_pred + 1e-9))

def train_logistic_regression(X, y, lr=0.1, iterations=1000):
    m, n = X.shape
    W = np.zeros((n, 1))
    b = 0

    for i in range(iterations):
        z = np.dot(X, W) + b
        y_pred = sigmoid(z)

        dW = (1/m) * np.dot(X.T, (y_pred - y))
        db = (1/m) * np.sum(y_pred - y)

        W -= lr * dW
        b -= lr * db

    return W, b

def predict_logistic(X, W, b):
    y_pred = sigmoid(np.dot(X, W) + b)
    return (y_pred >= 0.5).astype(int)

# Train and evaluate
W1, b1 = train_logistic_regression(X_train, y_train)
y_pred1 = predict_logistic(X_test, W1, b1)
acc1 = np.mean(y_pred1 == y_test)
print("Classifier 1 (Logistic Regression) Accuracy:", round(acc1 * 100, 2), "%")


Classifier 1 (Logistic Regression) Accuracy: 85.0 %


In [4]:
# Try classifier 2

def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2) ** 2))

def predict_knn(X_train, y_train, X_test, k=5):
    y_pred = []
    for x in X_test:
        # Compute distances
        distances = np.array([euclidean_distance(x, x_train) for x_train in X_train])
        # Get k nearest neighbors
        k_indices = distances.argsort()[:k]
        k_labels = y_train[k_indices]
        # Majority vote
        label = np.round(np.mean(k_labels))
        y_pred.append(label)
    return np.array(y_pred).reshape(-1, 1)

# Predict and evaluate
y_pred2 = predict_knn(X_train, y_train, X_test, k=5)
acc2 = np.mean(y_pred2 == y_test)
print("Classifier 2 (k-Nearest Neighbors) Accuracy:", round(acc2 * 100, 2), "%")


Classifier 2 (k-Nearest Neighbors) Accuracy: 87.5 %


In [5]:
# Compare results

print("\n--- Model Comparison ---")
print(f"Logistic Regression Accuracy: {acc1 * 100:.2f}%")
print(f"KNN Accuracy: {acc2 * 100:.2f}%")

if acc1 > acc2:
    print("\nBest Model: Logistic Regression")
elif acc2 > acc1:
    print("\nBest Model: K-Nearest Neighbors")
else:
    print("\nBoth models performed equally well.")



--- Model Comparison ---
Logistic Regression Accuracy: 85.00%
KNN Accuracy: 87.50%

Best Model: K-Nearest Neighbors


## Task 3: Clustering

In [None]:
# Load data

In [None]:
# Apply K-means

In [None]:
# Visualize clusters

In [None]:
# Find optimal K