In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

# Selecting relevant features and target
features = ["quality_of_education", "alumni_employment", "quality_of_faculty", "publications", "influence", "citations", "patents"]
target = "score"

# Drop rows with missing values
df = df.dropna(subset=features + [target])

# Extracting input and output variables
X = df[features].values
y = df[target].values.reshape(-1, 1)

# Standardizing features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Adding bias term
X = np.c_[np.ones((X.shape[0], 1)), X]

# Gradient Descent Implementation
def gradient_descent(X, y, learning_rate=0.01, iterations=1000):
    m, n = X.shape
    theta = np.zeros((n, 1))
    history = []
    
    for _ in range(iterations):
        gradients = (1/m) * X.T @ (X @ theta - y)
        theta -= learning_rate * gradients
        loss = (1/(2*m)) * np.sum((X @ theta - y) ** 2)
        history.append(loss)
    
    return theta, history

# Running gradient descent
theta, history = gradient_descent(X, y)

# Plot loss reduction
plt.plot(history)
plt.xlabel("Iterations")
plt.ylabel("Loss")
plt.title("Loss Reduction Over Iterations")
plt.show()

# Displaying final parameters
print("Optimized Theta Values:")
print(theta)


ModuleNotFoundError: No module named 'numpy'