In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

print("ðŸš€ CARDIO MODELS FROM SCRATCH (No sklearn)")
print("=" * 50)

ðŸš€ CARDIO MODELS FROM SCRATCH (No sklearn)


In [2]:
train_df = pd.read_csv('cardio_train_ml_ready.csv')
test_df = pd.read_csv('cardio_test_ml_ready.csv')

X_train = train_df.drop('target', axis=1).values
y_train = train_df['target'].values
X_test = test_df.drop('target', axis=1).values
y_test = test_df['target'].values

print(f"Train: {X_train.shape}, Test: {X_test.shape}")

Train: (56000, 14), Test: (14000, 14)


In [3]:
def accuracy(y_true, y_pred):
    return np.mean(y_true == y_pred)

def precision(y_true, y_pred):
    tp = np.sum((y_true == 1) & (y_pred == 1))
    fp = np.sum((y_true == 0) & (y_pred == 1))
    return tp / (tp + fp) if (tp + fp) > 0 else 0

def recall(y_true, y_pred):
    tp = np.sum((y_true == 1) & (y_pred == 1))
    fn = np.sum((y_true == 1) & (y_pred == 0))
    return tp / (tp + fn) if (tp + fn) > 0 else 0

print("âœ… Metrics ready!")

âœ… Metrics ready!


In [4]:
class LogisticRegressionScratch:
    def __init__(self, lr=0.01, epochs=1000):
        self.lr = lr
        self.epochs = epochs
        self.weights = None
        self.bias = None
    
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-np.clip(z, -250, 250)))
    
    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0
        
        for _ in range(self.epochs):
            linear_model = np.dot(X, self.weights) + self.bias
            y_predicted = self.sigmoid(linear_model)
            
            dw = (1/n_samples) * np.dot(X.T, (y_predicted - y))
            db = (1/n_samples) * np.sum(y_predicted - y)
            
            self.weights -= self.lr * dw
            self.bias -= self.lr * db
    
    def predict(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        y_predicted = self.sigmoid(linear_model)
        return (y_predicted >= 0.5).astype(int)

# Train and test
lr_model = LogisticRegressionScratch()
lr_model.fit(X_train, y_train)
lr_pred = lr_model.predict(X_test)

print("Logistic Regression Results:")
print(f"Accuracy: {accuracy(y_test, lr_pred):.3f}")
print(f"Precision: {precision(y_test, lr_pred):.3f}")
print(f"Recall: {recall(y_test, lr_pred):.3f}")

Logistic Regression Results:
Accuracy: 0.632
Precision: 0.633
Recall: 0.627


In [7]:
# ===== CELL 3: FIXED DECISION TREE FROM SCRATCH =====
class DecisionTreeScratch:
    def __init__(self, max_depth=5):
        self.max_depth = max_depth
    
    def gini(self, y):
        _, counts = np.unique(y, return_counts=True)
        probs = counts / len(y)
        return 1 - np.sum(probs**2)
    
    def best_split(self, X, y, feature_idx):
        best_gini = float('inf')
        best_threshold = None
        
        thresholds = np.percentile(X[:, feature_idx], np.arange(10, 90, 10))
        for threshold in thresholds:
            left = y[X[:, feature_idx] <= threshold]
            right = y[X[:, feature_idx] > threshold]
            
            if len(left) == 0 or len(right) == 0:
                continue
                
            gini_left = self.gini(left)
            gini_right = self.gini(right)
            weighted_gini = (len(left)/len(y)) * gini_left + (len(right)/len(y)) * gini_right
            
            if weighted_gini < best_gini:
                best_gini = weighted_gini
                best_threshold = threshold
        
        return best_gini, best_threshold
    
    def fit(self, X, y):
        self.n_features = X.shape[1]
        self.tree_ = self._build_tree(X, y)
    
    def _build_tree(self, X, y, depth=0):
        if len(np.unique(y)) == 1 or depth == self.max_depth:
            return np.bincount(y).argmax()
        
        best_gini = float('inf')
        best_feature = None
        best_threshold = None
        
        for feature in range(self.n_features):
            gini, threshold = self.best_split(X, y, feature)
            if gini < best_gini:
                best_gini = gini
                best_feature = feature
                best_threshold = threshold
        
        if best_feature is None:
            return np.bincount(y).argmax()
        
        left_mask = X[:, best_feature] <= best_threshold
        right_mask = ~left_mask
        
        left = self._build_tree(X[left_mask], y[left_mask], depth+1)
        right = self._build_tree(X[right_mask], y[right_mask], depth+1)
        
        return (best_feature, best_threshold, left, right)
    
    def predict_one(self, x):
        node = self.tree_
        if isinstance(node, np.int64) or isinstance(node, int):
            return int(node)
        
        while not isinstance(node, (int, np.int64)):
            feature, threshold, left, right = node
            if x[feature] <= threshold:
                node = left
            else:
                node = right
        return int(node)
    
    def predict(self, X):
        return np.array([self.predict_one(x) for x in X])

# Train and test
dt_model = DecisionTreeScratch(max_depth=5)
dt_model.fit(X_train[:1000], y_train[:1000])
dt_pred = dt_model.predict(X_test[:1000])

print("\nâœ… Decision Tree - Results:")
print(f"Accuracy: {accuracy(y_test[:1000], dt_pred):.3f}")



âœ… Decision Tree - Results:
Accuracy: 0.733
