In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, accuracy_score

In [None]:
# Load datasets
iris_df = pd.read_csv("/content/sample_data/Iris.csv")
customer_df = pd.read_csv("/content/sample_data/Customer Purchasing Behaviors.csv")

print("Customer data shape:", customer_df.shape)
print("Iris data shape:", iris_df.shape)


Customer data shape: (238, 7)
Iris data shape: (150, 6)


In [None]:
# prepare customer data for linear regression
print("\n--- Linear Regression (Customer Loyalty) ---")
customer_df.drop(columns=['user_id'], inplace=True)
customer_df = pd.get_dummies(customer_df, columns=['region'], drop_first=True)

X_cust = customer_df.drop(columns=['loyalty_score'])
y_cust = customer_df['loyalty_score']

X_train_cust, X_test_cust, y_train_cust, y_test_cust = train_test_split(
    X_cust, y_cust, test_size=0.2, random_state=42
)

# normalize features to prevent numerical issues in gradient descent
X_train_mean = X_train_cust.mean()
X_train_std = X_train_cust.std()
X_train_cust_norm = (X_train_cust - X_train_mean) / X_train_std
X_test_cust_norm = (X_test_cust - X_train_mean) / X_train_std

# Linear Regression from scratch
class SimpleLinearRegression:
    def __init__(self, lr=0.01, epochs=1000):
        self.lr = lr
        self.epochs = epochs
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        # Gradient descent
        for _ in range(self.epochs):
            y_predicted = np.dot(X, self.weights) + self.bias
            dw = (1/n_samples) * np.dot(X.T, (y_predicted - y))
            db = (1/n_samples) * np.sum(y_predicted - y)

            self.weights -= self.lr * dw
            self.bias -= self.lr * db

    def predict(self, X):
        return np.dot(X, self.weights) + self.bias

#train custom linear regression
lin_reg_custom = SimpleLinearRegression(lr=0.01, epochs=5000)
lin_reg_custom.fit(X_train_cust_norm.values.astype(float), y_train_cust.values.astype(float))
y_pred_custom = lin_reg_custom.predict(X_test_cust_norm.values.astype(float))

rmse_custom = np.sqrt(mean_squared_error(y_test_cust, y_pred_custom))
print(f"Custom Linear Regression RMSE: {rmse_custom:.4f}")


--- Linear Regression (Customer Loyalty) ---
Custom Linear Regression RMSE: 0.2011


In [None]:
#prepare iris data for logistic regression
print("\n--- Logistic Regression (Iris Classification) ---")
iris_df.drop(columns=['Id'], inplace=True)

label_encoder = LabelEncoder()
iris_df['Species'] = label_encoder.fit_transform(iris_df['Species'])

X_iris = iris_df.drop(columns=['Species'])
y_iris = iris_df['Species']

X_train_iris, X_test_iris, y_train_iris, y_test_iris = train_test_split(
    X_iris, y_iris, test_size=0.2, random_state=42
)

# Logistic Regression from scratch (multi-class using One-vs-Rest)
class SimpleLogisticRegression:
    def __init__(self, lr=0.01, epochs=1000):
        self.lr = lr
        self.epochs = epochs
        self.weights = {}  # one set of weights per class
        self.bias = {}
        self.classes = None

    def sigmoid(self, z):
        z = np.clip(z, -500, 500)  # prevent overflow
        return 1 / (1 + np.exp(-z))

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.classes = np.unique(y)

        # Train separate binary classifier for each class
        for cls in self.classes:
            y_binary = (y == cls).astype(int)
            self.weights[cls] = np.zeros(n_features)
            self.bias[cls] = 0

            # Gradient descent for this class
            for _ in range(self.epochs):
                linear_model = np.dot(X, self.weights[cls]) + self.bias[cls]
                y_predicted = self.sigmoid(linear_model)
                dw = (1/n_samples) * np.dot(X.T, (y_predicted - y_binary))
                db = (1/n_samples) * np.sum(y_predicted - y_binary)
                self.weights[cls] -= self.lr * dw
                self.bias[cls] -= self.lr * db

    def predict(self, X):
        predictions = []
        for sample in X:
            # Get the score from each classifier
            class_scores = {}
            for cls in self.classes:
                linear_model = np.dot(sample, self.weights[cls]) + self.bias[cls]
                class_scores[cls] = self.sigmoid(linear_model)
            # Pick class with highest score
            predictions.append(max(class_scores, key=class_scores.get))
        return np.array(predictions)

# Train custom logistic regression
log_reg_custom = SimpleLogisticRegression(lr=0.1, epochs=1000)
log_reg_custom.fit(X_train_iris.values.astype(float), y_train_iris.values.astype(int))
y_pred_custom_log = log_reg_custom.predict(X_test_iris.values.astype(float))

acc_custom = accuracy_score(y_test_iris, y_pred_custom_log)
print(f"Custom Logistic Regression Accuracy: {acc_custom:.4f}")


--- Logistic Regression (Iris Classification) ---
Custom Logistic Regression Accuracy: 1.0000
