In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix

# ---------------------------------------------------
# 1. Universal Kernel and SVM Class
# ---------------------------------------------------
def polynomial_kernel(x1, x2, degree=3, coef0=1):
    """
    Computes the true polynomial kernel: K(a, b) = (a^T * b + c)^d
    """
    return (np.dot(x1, x2.T) + coef0) ** degree

class KernelSVM:
    def __init__(self, lr=0.001, lambda_param=0.01, n_iters=100, degree=3, coef0=1):
        self.lr = lr
        self.lambda_param = lambda_param
        self.n_iters = n_iters
        self.degree = degree
        self.coef0 = coef0
        self.alpha = None
        self.b = 0
        self.X_train = None
        self.y_train = None

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.X_train = X
        
        # --- FIX for TypeError ---
        # Ensure y is a NumPy array for element-wise multiplication
        self.y_train = np.array(y)
        # --- End of Fix ---
        
        self.alpha = np.zeros(n_samples)
        self.b = 0
        
        K = polynomial_kernel(X, X, self.degree, self.coef0)
        
        for _ in range(self.n_iters):
            for i in range(n_samples):
                # This line will no longer cause an error
                pred = np.sum(self.alpha * self.y_train * K[:, i]) + self.b
                condition = self.y_train[i] * pred >= 1
                
                if condition:
                    self.alpha[i] -= self.lr * (self.lambda_param * self.alpha[i])
                else:
                    self.alpha[i] += self.lr * (1 - self.y_train[i] * pred)
                    self.b += self.lr * self.y_train[i]

    def predict(self, X_test):
        K = polynomial_kernel(self.X_train, X_test, self.degree, self.coef0)
        decision = np.dot(self.alpha * self.y_train, K) + self.b
        return np.sign(decision)

# ---------------------------------------------------
# 2. Load and Prepare Student Data
# ---------------------------------------------------
try:
    data = pd.read_csv(r"C:/Users/ROHIT/Onedrive/Desktop/SM/AIML/datasets/student_performance_dataset_20.csv")
    
    feature_columns = ['Study_Hours_per_Week', 'Attendance_Rate', 'Internal_Scores']
    target_column = 'Pass_Fail'
    
    X = data[feature_columns].values
    y = np.where(data[target_column] == 'Fail', -1, 1)

    # ---------------------------------------------------
    # 3. Split and Scale Data (Correctly)
    # ---------------------------------------------------
    
    # First, split the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
    
    # Create the scaler
    scaler = StandardScaler()
    
    # Fit the scaler ONLY on the training data
    X_train_scaled = scaler.fit_transform(X_train)
    
    # Transform the test data using the fitted scaler
    X_test_scaled = scaler.transform(X_test)

    # ---------------------------------------------------
    # 4. Train and Evaluate SVM (Polynomial)
    # ---------------------------------------------------
    print("\nTraining SVM (Polynomial Kernel, degree=2) from Scratch...")
    # Use degree=2 for a Polynomial Kernel
    svm_model_20 = KernelSVM(lr=0.001, lambda_param=0.01, n_iters=100, degree=2, coef0=1)
    
    # Train the model on the SCALED training data
    svm_model_20.fit(X_train_scaled, y_train)
    
    print("Training complete.")
    
    # Make predictions on the SCALED test data
    y_pred = svm_model_20.predict(X_test_scaled)
    
    print("\n--- Evaluation (Problem 20) ---")
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))
    
    print("\nClassification Report (Precision, Recall, F1-Score):")
    print(classification_report(y_test, y_pred, target_names=['Fail (-1)', 'Pass (1)']))

    # ---------------------------------------------------
    # 5. Predict on New Test Cases
    # ---------------------------------------------------
    print("\n--- Test New Predictions ---")

    def predict_student_performance(study_hours, attendance_rate, internal_score):
        """
        Predicts Pass/Fail for a new student using the trained model.
        """
        # 1. Create the new data as a 2D array
        new_data = np.array([[study_hours, attendance_rate, internal_score]])
        
        # 2. Scale the new data using the SAME scaler from training
        new_data_scaled = scaler.transform(new_data)
        
        # 3. Predict using the trained SVM model
        prediction = svm_model_20.predict(new_data_scaled)
        
        # 4. Return the human-readable result
        return "Pass" if prediction[0] == 1 else "Fail"

    # --- TEST YOUR MODEL HERE ---
    
    # Example 1: A student with poor stats
    sh_1 = 10  # Study Hours
    ar_1 = 55  # Attendance Rate
    is_1 = 60  # Internal Score
    print(f"Prediction for (Hours: {sh_1}, Attend: {ar_1}, Score: {is_1}): {predict_student_performance(sh_1, ar_1, is_1)}")

    # Example 2: A student with great stats
    sh_2 = 35  # Study Hours
    ar_2 = 98  # Attendance Rate
    is_2 = 92  # Internal Score
    print(f"Prediction for (Hours: {sh_2}, Attend: {ar_2}, Score: {is_2}): {predict_student_performance(sh_2, ar_2, is_2)}")
    
    # Example 3: An average student
    sh_3 = 20  # Study Hours
    ar_3 = 80  # Attendance Rate
    is_3 = 75  # Internal Score
    print(f"Prediction for (Hours: {sh_3}, Attend: {ar_3}, Score: {is_3}): {predict_student_performance(sh_3, ar_3, is_3)}")


except FileNotFoundError:
    print("Error: 'student_performance_dataset_20.csv' not found.")
except KeyError:
    print("Error: CSV must contain 'Study_Hours_per_Week', 'Attendance_Rate', 'Internal_Scores', and 'Pass_Fail' columns.")
except Exception as e:
    print(f"An error occurred: {e}")


Training SVM (Polynomial Kernel, degree=2) from Scratch...
Training complete.

--- Evaluation (Problem 20) ---
Confusion Matrix:
[[45 26]
 [ 7 64]]

Classification Report (Precision, Recall, F1-Score):
              precision    recall  f1-score   support

   Fail (-1)       0.87      0.63      0.73        71
    Pass (1)       0.71      0.90      0.80        71

    accuracy                           0.77       142
   macro avg       0.79      0.77      0.76       142
weighted avg       0.79      0.77      0.76       142


--- Test New Predictions ---
Prediction for (Hours: 10, Attend: 55, Score: 60): Fail
Prediction for (Hours: 35, Attend: 98, Score: 92): Pass
Prediction for (Hours: 20, Attend: 80, Score: 75): Pass
