# Lab Practice 4: Cost Function and Gradient Descent Implementation

**Department of Electrical and Computer Engineering**  
**Pak-Austria Fachhochschule: Institute of Applied Sciences & Technology**  
**Subject: Machine Learning**  
**Subject Teacher: Dr. Abid Ali**  
**Lab Supervisor: Miss. Sana Saleem**

## Objective
Implement gradient descent algorithm from scratch to optimize linear regression parameters and visualize the cost function over iterations.

## Dataset
- **File**: diabetes.csv
- **Features**: All features except Outcome
- **Target Variable**: Outcome (0 or 1)
- **Algorithm**: Custom gradient descent implementation


In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from scipy import stats

print("Libraries imported successfully!")


In [None]:
# Load and preprocess the dataset
url = "diabetes.csv"
columns = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 
           'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome']
df = pd.read_csv(url, names=columns)

# Data preprocessing
df = df.apply(pd.to_numeric, errors='coerce')
df = df.dropna()

# Remove outliers using Z-score
z_scores = np.abs(stats.zscore(df))
df_clean = df[(z_scores < 3).all(axis=1)]

print(f"Dataset shape after cleaning: {df_clean.shape}")

# Prepare features and target
X = df_clean.drop(columns='Outcome')
y = df_clean['Outcome']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"Training set size: {X_train_scaled.shape[0]}")
print(f"Test set size: {X_test_scaled.shape[0]}")
print(f"Number of features: {X_train_scaled.shape[1]}")


In [None]:
# Initialize parameters
np.random.seed(42)
m, n = X_train_scaled.shape
theta = np.random.randn(n)  # Initial weights
bias = 0.0  # Initial bias
learning_rate = 0.01
epochs = 1000

print(f"Initial parameters:")
print(f"Theta shape: {theta.shape}")
print(f"Initial theta: {theta}")
print(f"Initial bias: {bias}")
print(f"Learning rate: {learning_rate}")
print(f"Number of epochs: {epochs}")

# Define the cost function (MSE)
def compute_cost(X, y, theta, bias):
    """
    Compute the mean squared error cost function
    """
    m = len(y)
    predictions = np.dot(X, theta) + bias
    cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2)
    return cost

# Define gradient descent function
def gradient_descent(X, y, theta, bias, learning_rate, epochs):
    """
    Perform gradient descent optimization
    """
    m = len(y)
    cost_history = []
    
    for epoch in range(epochs):
        # Make predictions
        predictions = np.dot(X, theta) + bias
        
        # Compute gradients
        d_theta = (1 / m) * np.dot(X.T, (predictions - y))
        d_bias = (1 / m) * np.sum(predictions - y)
        
        # Update weights
        theta -= learning_rate * d_theta
        bias -= learning_rate * d_bias
        
        # Calculate cost and save it for plotting
        cost = compute_cost(X, y, theta, bias)
        cost_history.append(cost)
        
        # Print progress every 100 epochs
        if epoch % 100 == 0:
            print(f"Epoch {epoch}: Cost = {cost:.6f}")
    
    return theta, bias, cost_history

print("\nStarting Gradient Descent Optimization...")
print("=" * 50)


In [None]:
# Run gradient descent
theta, bias, cost_history = gradient_descent(X_train_scaled, y_train, theta, bias, learning_rate, epochs)

print(f"\nGradient Descent Completed!")
print(f"Final theta: {theta}")
print(f"Final bias: {bias:.6f}")
print(f"Final cost: {cost_history[-1]:.6f}")

# Make predictions
y_pred_train = np.dot(X_train_scaled, theta) + bias
y_pred_test = np.dot(X_test_scaled, theta) + bias

# Calculate test MSE
mse_test = mean_squared_error(y_test, y_pred_test)
print(f"Test MSE: {mse_test:.6f}")

# Calculate R-squared
def r2_score_custom(y_true, y_pred):
    ss_res = np.sum((y_true - y_pred) ** 2)
    ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
    return 1 - (ss_res / ss_tot)

r2_test = r2_score_custom(y_test, y_pred_test)
print(f"Test R²: {r2_test:.6f}")


In [None]:
# Comprehensive visualization
plt.figure(figsize=(20, 15))

# Plot 1: Cost function over epochs
plt.subplot(3, 4, 1)
plt.plot(range(epochs), cost_history, color='blue', linewidth=2)
plt.xlabel('Epochs')
plt.ylabel('Cost (MSE)')
plt.title('Cost Function Over Epochs (Gradient Descent)')
plt.grid(True, alpha=0.3)

# Plot 2: Cost function (log scale)
plt.subplot(3, 4, 2)
plt.plot(range(epochs), cost_history, color='red', linewidth=2)
plt.xlabel('Epochs')
plt.ylabel('Cost (MSE) - Log Scale')
plt.title('Cost Function (Log Scale)')
plt.yscale('log')
plt.grid(True, alpha=0.3)

# Plot 3: Actual vs Predicted (Training)
plt.subplot(3, 4, 3)
plt.scatter(y_train, y_pred_train, color='blue', alpha=0.6)
plt.plot([y_train.min(), y_train.max()], [y_train.min(), y_train.max()], color='red', linewidth=2)
plt.xlabel('Actual Outcome')
plt.ylabel('Predicted Outcome')
plt.title('Training: Actual vs Predicted')
plt.grid(True, alpha=0.3)

# Plot 4: Actual vs Predicted (Test)
plt.subplot(3, 4, 4)
plt.scatter(y_test, y_pred_test, color='green', alpha=0.6)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], color='red', linewidth=2)
plt.xlabel('Actual Outcome')
plt.ylabel('Predicted Outcome')
plt.title('Test: Actual vs Predicted')
plt.grid(True, alpha=0.3)

# Plot 5: Residuals (Training)
plt.subplot(3, 4, 5)
train_residuals = y_train - y_pred_train
plt.scatter(y_pred_train, train_residuals, color='blue', alpha=0.6)
plt.axhline(y=0, color='red', linestyle='--')
plt.xlabel('Predicted Values')
plt.ylabel('Residuals')
plt.title('Training Residuals')
plt.grid(True, alpha=0.3)

# Plot 6: Residuals (Test)
plt.subplot(3, 4, 6)
test_residuals = y_test - y_pred_test
plt.scatter(y_pred_test, test_residuals, color='green', alpha=0.6)
plt.axhline(y=0, color='red', linestyle='--')
plt.xlabel('Predicted Values')
plt.ylabel('Residuals')
plt.title('Test Residuals')
plt.grid(True, alpha=0.3)

# Plot 7: Feature importance (coefficients)
plt.subplot(3, 4, 7)
feature_importance = pd.DataFrame({
    'Feature': X.columns,
    'Coefficient': theta
}).sort_values('Coefficient', key=abs, ascending=True)
colors = ['red' if x < 0 else 'blue' for x in feature_importance['Coefficient']]
bars = plt.barh(feature_importance['Feature'], feature_importance['Coefficient'], color=colors, alpha=0.7)
plt.xlabel('Coefficient Value')
plt.title('Feature Importance After Gradient Descent')
plt.grid(True, alpha=0.3)

# Plot 8: Learning curve (cost vs iterations)
plt.subplot(3, 4, 8)
plt.plot(range(100, epochs), cost_history[100:], color='purple', linewidth=2)
plt.xlabel('Epochs')
plt.ylabel('Cost (MSE)')
plt.title('Learning Curve (Epochs 100-1000)')
plt.grid(True, alpha=0.3)

# Plot 9: Cost convergence
plt.subplot(3, 4, 9)
cost_diff = np.diff(cost_history)
plt.plot(range(1, len(cost_diff)+1), cost_diff, color='orange', linewidth=2)
plt.xlabel('Epochs')
plt.ylabel('Cost Change')
plt.title('Cost Convergence Rate')
plt.grid(True, alpha=0.3)

# Plot 10: Prediction distribution
plt.subplot(3, 4, 10)
plt.hist(y_pred_test, bins=30, alpha=0.7, color='lightblue', edgecolor='black')
plt.xlabel('Predicted Values')
plt.ylabel('Frequency')
plt.title('Distribution of Test Predictions')
plt.grid(True, alpha=0.3)

# Plot 11: Error distribution
plt.subplot(3, 4, 11)
plt.hist(test_residuals, bins=30, alpha=0.7, color='lightcoral', edgecolor='black')
plt.xlabel('Residuals')
plt.ylabel('Frequency')
plt.title('Distribution of Test Residuals')
plt.grid(True, alpha=0.3)

# Plot 12: Comparison with sklearn
plt.subplot(3, 4, 12)
from sklearn.linear_model import LinearRegression
sklearn_model = LinearRegression()
sklearn_model.fit(X_train_scaled, y_train)
sklearn_pred = sklearn_model.predict(X_test_scaled)

plt.scatter(y_test, y_pred_test, color='blue', alpha=0.6, label='Gradient Descent')
plt.scatter(y_test, sklearn_pred, color='red', alpha=0.6, label='Sklearn')
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], color='black', linewidth=2)
plt.xlabel('Actual Outcome')
plt.ylabel('Predicted Outcome')
plt.title('Gradient Descent vs Sklearn')
plt.legend()
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Compare with sklearn results
sklearn_mse = mean_squared_error(y_test, sklearn_pred)
sklearn_r2 = r2_score_custom(y_test, sklearn_pred)

print(f"\nComparison with Sklearn:")
print(f"Gradient Descent - MSE: {mse_test:.6f}, R²: {r2_test:.6f}")
print(f"Sklearn - MSE: {sklearn_mse:.6f}, R²: {sklearn_r2:.6f}")
print(f"Difference - MSE: {abs(mse_test - sklearn_mse):.6f}, R²: {abs(r2_test - sklearn_r2):.6f}")
