In [None]:
import numpy as np
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# Define a threshold to classify universities as "good" or "not good"
threshold = 80  # Universities with a score >= 80 are "good"
df["good_university"] = (df["score"] >= threshold).astype(int)

# Selecting relevant features and target
features = ["quality_of_education", "alumni_employment", "quality_of_faculty", "publications", "influence", "citations", "patents"]
target = "good_university"

# Drop rows with missing values
df = df.dropna(subset=features + [target])

# Extracting input and output variables
X = df[features].values
y = df[target].values.reshape(-1, 1)

# Standardizing features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Adding bias term
X = np.c_[np.ones((X.shape[0], 1)), X]

# Sigmoid function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# Logistic Regression using Gradient Descent
def gradient_descent_logistic(X, y, learning_rate=0.01, iterations=1000):
    m, n = X.shape
    theta = np.zeros((n, 1))
    history = []
    
    for _ in range(iterations):
        predictions = sigmoid(X @ theta)
        gradients = (1/m) * X.T @ (predictions - y)
        theta -= learning_rate * gradients
        loss = - (1/m) * np.sum(y * np.log(predictions) + (1 - y) * np.log(1 - predictions))
        history.append(loss)
    
    return theta, history

# Running gradient descent
theta, history = gradient_descent_logistic(X, y)

# Plot loss reduction
plt.plot(history)
plt.xlabel("Iterations")
plt.ylabel("Loss")
plt.title("Loss Reduction Over Iterations")
plt.show()

# Displaying final parameters
print("Optimized Theta Values:")
print(theta)
