In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt




In [None]:
# Load independent variables
X = pd.read_csv("logisticX.csv")

# Load dependent variable
y = pd.read_csv("logisticY.csv")


data = pd.concat([X, y], axis=1)




In [None]:
print(data.describe())
print(data.info())

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)



In [None]:
class LogisticRegression:
    def __init__(self, learning_rate=0.1, max_iter=10):
        self.learning_rate = learning_rate
        self.max_iter = max_iter
        self.weights = None  # Initialize weights to None

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def cost_function(self, X, y, weights):
        m = len(X)
        z = np.dot(X, weights)
        a = self.sigmoid(z)
        cost = -(1/m) * np.sum(y * np.log(a) + (1 - y) * np.log(1 - a))
        return cost

    def gradient_descent(self, X, y):
        m = len(X)
        self.weights = np.random.rand(X.shape[1])  # Initialize random weights
        costs = []

        for iter in range(self.max_iter):
            z = np.dot(X, self.weights)
            a = self.sigmoid(z)
            errors = a - y
            gradient = -(1/m) * np.dot(X.T, errors)
            self.weights -= self.learning_rate * gradient
            cost = self.cost_function(X, y, self.weights)
            costs.append(cost)

            if iter % 100 == 0:  # Print progress updates every 100 iterations
                print(f"Iteration {iter+1}: Cost = {cost}")

        return costs

    def predict(self, X_new):
        z = np.dot(X_new, self.weights)
        a = self.sigmoid(z)
        return np.where(a > 0.5, 1, 0)  # Threshold at 0.5 for binary classification


In [None]:
model = LogisticRegression(learning_rate=0.1)
costs = model.gradient_descent(X_scaled, y)

print(f"Final cost value: {costs[-1]}")  # Access the last cost after convergence

# Use trained weights to find decision boundary (more advanced methods can be used)
# ...

# Plot decision boundary and data points (code similar to Response B's)
# ...


In [None]:
# Train models with learning rates 0.1 and 5
model_01 = LogisticRegression(learning_rate=0.1)
costs_01 = model_01.gradient_descent(X_scaled, y)

model_05 = LogisticRegression(learning_rate=5)
costs_05 = model_05.gradient_descent(X_scaled, y)

# Create the plot (ensure correct iteration limits for convergence)
plt.plot(range(100), costs_01[:100], label="Learning Rate 0.1")
plt.plot(range(100), costs_05[:100], label="Learning Rate 5")
plt.xlabel("Iteration")
plt.ylabel("Cost Function")
plt.legend()
plt.


In [None]:
import matplotlib.colors as colors

# Assuming 2 classes ('0' and '1')
cmap = colors.ListedColormap(['blue', 'orange'])
bounds = [-0.5, 0.5, 1.5]  # Adjust according to data range

# Create a meshgrid for smooth decision boundary visualization
resolution = 500
X1, X2 = np.meshgrid(np.linspace(X_scaled[:, 0].min(), X_scaled[:, 0].max(), resolution),
                     np.linspace(X_scaled[:, 1].min(), X_scaled[:, 1].max(), resolution))
Z = model.predict(np.c_[X1.ravel(), X2.ravel()])
Z = Z.reshape(X1.shape)

# Plot the contour
plt.contourf(X1, X2, Z, cmap=cmap, norm=colors.BoundaryNorm(bounds, cmap.N))

# Plot actual data points with different colors for classes
plt.scatter(X_scaled[:, 0], X_scaled[:, 1], c=y, cmap=cmap)

# Enhance plot visualization (consider these suggestions):
plt.title("Data Points and Decision Boundary (Learning Rate 0.1)")
plt.xlabel("Feature 1")
plt.ylabel("Feature 2")
plt.colorbar(label="Predicted Class")
plt.tight_layout()
plt.show()


In [None]:
# Ensure convergence by training for sufficient iterations
max_iter = 1000

model_01 = LogisticRegression(learning_rate=0.1, max_iter=max_iter)
costs_01 = model_01.gradient_descent(X_scaled, y)

model_05 = LogisticRegression(learning_rate=5, max_iter=max_iter)
costs_05 = model_05.gradient_descent(X_scaled, y)

plt.plot(range(max_iter), costs_01, label="Learning Rate 0.1")
plt.plot(range(max_iter), costs_05, label="Learning Rate 5")
plt.xlabel("Iteration")
plt.ylabel("Cost Function")
plt.legend()
plt.title("Cost Function vs. Iteration (Max Iterations)")
plt.tight_layout()
plt.show()


In [None]:
from sklearn.metrics import confusion_matrix

y_pred = model_01.predict(X_scaled)  # Use model from Q3 with learning rate 0.1
cm = confusion_matrix(y, y_pred)

print("Confusion Matrix:\n", cm)

# Calculate accuracy, precision, recall, and F1-score
accuracy = (cm[0, 0] + cm[1, 1]) / len(y)
precision = cm[0, 0] / (cm[0, 0] + cm[0, 1])  # Adjust for class if needed
recall = cm[0, 0] / (cm[0, 0] + cm[1, 0])  # Adjust for class if needed
f1_score = 2 * precision * recall / (precision + recall)  # Adjust for class if needed

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-Score:", f1_score)


In [None]:
conda install -c anaconda numpy
