### LOGISTIC REGRESSION
In this assignment we try to find a relation between average weight of granules and total surface area to see if a material is viable as a catalyst

In [1]:
# Importing libraries here
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

### Feature mapping
Sometimes, we are given an inadequate number of features for which training the dataset becomes difficult.  
Hence we create new features of by taking polynomial products of existing ones.

In [2]:
# Generates new features by taking products of various powers of the input features (x and y)
# This expands the feature space with polynomial combinations (up to degree 4) of x and y

### TODO 1
def feature_map(points):
    points = np.array(points)
    x, y = points[:, 0], points[:, 1]
    
    features = []
    # YOUR CODE HERE
    # Append different terms such as x, y^2, x^2y and so on upto degree 4 terms
    features.append(x)
    features.append(y)
    features.append(x**2)
    features.append(y**2)
    features.append(x*y)
    features.append(x**3)
    features.append(y**3)
    features.append(x*(y**2))
    features.append((x**2)*y)
    features.append(x**4)
    features.append(y**4)
    features.append((x**3)*y)
    features.append((y**3)*x)
    features.append((x**2)*(y**2))
    
    return np.column_stack(features)

### Creating the class for Logistic Regression

In [None]:
# Our class in which we define all functions. (This way our functions are contained)
class LogisticRegression:

    # Constructor (weights and bias are member variables)
    # Can be accessed via self.weights and self.bias
    def __init__(self) -> None:
        self.weights : np.ndarray | None = None
        self.bias : float | None = None



    # Sigmoid function
    ### TODO 2
    def __sigmoid(self, z: np.ndarray) -> np.ndarray:
        # YOUR CODE HERE
        return (1/(1+np.exp(-z)))


    
    # Returns probabilities of being true
    ### TODO 3
    def predict_probability(self, X: np.ndarray) -> np.ndarray:
        # YOUR CODE HERE
        Y_pred=np.dot(self.weights,X)+self.bias
        return self.__sigmoid(Y_pred)


    # Returns true/false (based on the probabilities)
    ### TODO 4
    def predict(self, X: np.ndarray) -> np.ndarray:
        # YOUR CODE HERE
        probabilities=self.predict_probability(X)
        predictions= probabilities>=0.5
        return predictions


    
    # Returns loss or cost, change in weights (dw), change in bias (db) with regularization
    ### TODO 5 
    def __loss(self, X: np.ndarray, y: np.ndarray, lambda_reg: float = 0) -> tuple:
        # Add code here
        predictions=self.predict_probability(X)
        loss = - (1/X.shape[0]) * (np.dot(y, np.log(predictions + 1e-12)) + np.dot(1 - y, np.log(1 - predictions + 1e-12)))
        dw = (1/X.shape[0])*()
        db = None

        # L2 Regularization
        loss += None ## Change this line
        dw += None ## Change this line

        return loss, dw, db
    


    # Adjusts the weights and bias to get the minimum loss
    ### TODO 6
    def fit(self, X: np.ndarray, y: np.ndarray, epochs: int = 500,
            learning_rate: float = 0.01, threshold: float = 0.0001, 
            lambda_reg: float = 1) -> None:
        # Randomise initial weights and bias
        # YOUR CODE HERE

        # Gradient descent
        for _ in range(epochs):
            # Logic for gradient descent
            # YOUR CODE HERE
            pass


In [None]:
# Importing data
df = pd.read_csv('logistic_data.csv')
data = df.to_numpy()
X = data[:, :2]
y = data[:, 2]

In [None]:
# Creating train-test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [3]:
# z-score standardization
### TODO 7
def z_score(X: np.ndarray) -> tuple:
    # Add code here
    x_mean = np.mean(x)
    x_std = np.std(x,axis=0)
    x = (x-x_mean)/x_std
    return x, x_mean, x_std

In [None]:
# Normalizing the data (we use the same constants to maintain consistency)
X_train, x_mean, x_std = z_score(X_train)
X_test = (X_test - x_mean) / x_std
x_train = feature_map(X_train)
x_test = feature_map(X_test)

In [None]:
# Visualizing how the boundary curve looks like
def plot_decision_boundary(X_original, y, model, resolution=500):
    # Set up the grid for the decision boundary
    x_min, x_max = X_original[:, 0].min() - 1, X_original[:, 0].max() + 1
    y_min, y_max = X_original[:, 1].min() - 1, X_original[:, 1].max() + 1
    xx, yy = np.meshgrid(np.linspace(x_min, x_max, resolution),
                         np.linspace(y_min, y_max, resolution))
    
    # Flatten the grid points and map to expanded features
    grid_original = np.c_[xx.ravel(), yy.ravel()]
    grid_expanded = feature_map(grid_original)
    
    # Predict the grid values for decision boundary
    Z = model.predict(grid_expanded)
    Z = Z.reshape(xx.shape)
    
    # Plot the data points
    true_points = X_original[y == 1]
    false_points = X_original[y == 0]
    plt.scatter(true_points[:, 0], true_points[:, 1], label="True", c="blue", marker="o", s=20)
    plt.scatter(false_points[:, 0], false_points[:, 1], label="False", c="red", marker="x", s=20)

    # Plot the decision boundary
    plt.contour(xx, yy, Z, levels=[0.5], colors="black", linewidths=2)
    
    # Labeling and title
    plt.xlabel("Feature 1")
    plt.ylabel("Feature 2")
    plt.title("Decision Boundary and Data Points")
    plt.show()

We plot the decision boundary that the model predicts. This can be used to check for overfitting.  
If the boundary starts looking like an ameoba trying to fit every point, then it is a sign of overfitting.

In [None]:
# Checking accuracy of test model
model = LogisticRegression()
model.fit(x_train, y_train, epochs=, learning_rate=, threshold=, lambda_reg=)
y_pred = model.predict(x_test)

accuracy = np.mean(y_pred == y_test) * 100
print(f"Your model has an accuracy of {accuracy}%")

# Plotting the contour and checking for overfitting (try changing the degree in the feature_map function to 10 instead)
plot_decision_boundary(((X - x_mean) / x_std), y, model)