In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_curve, auc

def logistic_regression_from_csv(csv_file, feature_cols, target_col, n_rows=50):
    """
    Perform logistic regression from CSV file using first N rows.
    
    Parameters:
    csv_file (str): Path to CSV file
    feature_cols (list): List of feature column names
    target_col (str): Target column name (binary classification)
    n_rows (int): Number of rows to use from dataset
    """
    
    # Load first N rows
    df = pd.read_csv(csv_file).head(n_rows)
    
    # Prepare features and target
    X = df[feature_cols]
    y = df[target_col]
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    
    # Create logistic regression model
    log_model = LogisticRegression(max_iter=1000)
    
    # Train model
    log_model.fit(X_train, y_train)
    
    # Make predictions
    y_pred = log_model.predict(X_test)
    y_prob = log_model.predict_proba(X_test)[:, 1]
    
    # Calculate metrics
    acc = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    
    # Print results
    print("="*50)
    print("LOGISTIC REGRESSION RESULTS")
    print("="*50)
    print(f"Dataset: {csv_file}")
    print(f"Using First {n_rows} Rows")
    print(f"Features: {feature_cols}")
    print(f"Target: {target_col}")
    print(f"Accuracy: {acc:.4f}")
    print("\nConfusion Matrix:")
    print(cm)
    print("\nClassification Report:")
    print(report)
    
    # ---------- Plotting ----------
    if len(feature_cols) == 1:
        # Visualization for single feature
        X_range = np.linspace(X.min().iloc[0], X.max().iloc[0], 100).reshape(-1, 1)
        y_range_prob = log_model.predict_proba(X_range)[:, 1]
        
        plt.figure(figsize=(10, 6))
        plt.scatter(X_test, y_test, color='blue', alpha=0.6, label='Actual Test')
        plt.scatter(X_test, y_pred, color='red', alpha=0.6, label='Predicted Test')
        plt.plot(X_range, y_range_prob, color='green', linewidth=2, label='Logistic Curve')
        plt.xlabel(feature_cols[0])
        plt.ylabel(f"Probability of {target_col}")
        plt.title(f'Logistic Regression - First {n_rows} Rows')
        plt.legend()
        plt.grid(True)
        plt.show()
    else:
        # ROC Curve for multi-feature
        fpr, tpr, thresholds = roc_curve(y_test, y_prob)
        roc_auc = auc(fpr, tpr)
        
        plt.figure(figsize=(8, 6))
        plt.plot(fpr, tpr, color='blue', label=f'ROC Curve (AUC = {roc_auc:.2f})')
        plt.plot([0, 1], [0, 1], color='gray', linestyle='--')
        plt.xlabel("False Positive Rate")
        plt.ylabel("True Positive Rate")
        plt.title("ROC Curve - Logistic Regression")
        plt.legend(loc="lower right")
        plt.grid(True)
        plt.show()
    
    return log_model, acc, cm, report


# Example usage
if __name__ == "__main__":
    csv_file = "manufacturing.csv"
    feature_columns = ["Temperature (°C)"]  # Or more features if needed
    target_column = "Defect"  # Ensure it's binary (0/1)
    
    model, acc, cm, report = logistic_regression_from_csv(
        csv_file, feature_columns, target_column, n_rows=50
    )


---

## Logistic Regression By Defining A Function  

# 📝 Explanation  

- **Imports**: Uses `pandas` & `numpy` for data, `matplotlib` for plots, and `sklearn` for ML tasks.  
- **Function Input**: Takes CSV path, feature columns, target column (binary), and number of rows.  
- **Step 1 – Load Data**: Reads first `n_rows` into DataFrame.  
- **Step 2 – Split Variables**: Separates features (`X`) and target (`y`).  
- **Step 3 – Train-Test Split**: Divides data into 80% training and 20% testing sets.  
- **Step 4 – Model Training**: Fits a `LogisticRegression` model.  
- **Step 5 – Predictions**: Predicts labels (`y_pred`) and probabilities (`y_prob`).  
- **Step 6 – Evaluation**: Computes **Accuracy**, **Confusion Matrix**, and **Classification Report**.  
- **Step 7 – Output**: Prints dataset info, accuracy, confusion matrix, and classification report.  
- **Step 8 – Visualization**:  
  - **1 feature** → Logistic curve + actual vs predicted scatter plot.  
  - **Multiple features** → ROC curve with AUC score.  
- **Step 9 – Return**: Returns trained model, accuracy, confusion matrix, and report.  

This code provides a complete **logistic regression workflow** with evaluation and visualization.  

Also works with **any CSV file** by defining features and target column.  

---
