In [76]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error

data = pd.read_csv('student_dropout_prediction.csv')

In [78]:
class CustomLogisticRegression:
    def __init__(self, lr=0.01, epochs=1000):
        self.lr = lr
        self.epochs = epochs
        self.weights = None
    
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    
    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        for _ in range(self.epochs):
            linear_model = np.dot(X, self.weights)
            predictions = self.sigmoid(linear_model)
            gradient = np.dot(X.T, (predictions - y)) / n_samples
            self.weights -= self.lr * gradient
    
    def predict(self, X):
        linear_model = np.dot(X, self.weights)
        probabilities = self.sigmoid(linear_model)
        return np.where(probabilities > 0.5, 1, 0)

In [80]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_class_scaled = scaler.fit_transform(X_class)
X_class = scaler.fit_transform(data[['GPA', 'Attendance_Percentage', 'Engagement_Score', 'Previous_Suspensions']].values)
y_class = data['Dropout_Label'].values

logistic_model = CustomLogisticRegression(lr=0.1, epochs=10000)  # Adjusted parameters
logistic_model.fit(X_class, y_class)
class_predictions = logistic_model.predict(X_class)

In [86]:
accuracy = np.sum(class_predictions == y_class) / len(y_class)
print(f"Accuracy (Custom Logistic Regression): {accuracy}")

Accuracy (Custom Logistic Regression): 0.552


In [83]:
X_pred = X_class  
y_pred = class_predictions  
print("\nSample Predicted Inputs and Outputs:")
for i in range(5):  
    print(f"Input: {X_pred[i]} -> Predicted Output: {y_pred[i]}")


Sample Predicted Inputs and Outputs:
Input: [ 1.30244516  0.79937898 -0.03187369  0.05565871] -> Predicted Output: 1
Input: [ 0.84964125  0.05143233 -1.51043226  1.48280516] -> Predicted Output: 1
Input: [-1.72228494 -0.26960727 -1.18347998 -0.65791451] -> Predicted Output: 0
Input: [-0.90723791  1.27646994 -0.70967625  0.05565871] -> Predicted Output: 0
Input: [-1.81284572 -0.30123001  1.69640161  0.76923194] -> Predicted Output: 0
