In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Scikit-learn Imports
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    classification_report, 
    confusion_matrix, 
    accuracy_score, 
    precision_score, 
    recall_score, 
    f1_score
)
from sklearn.model_selection import cross_val_score

In [2]:
class CybersecurityThreatClassifier:
    def __init__(self, dataset_path):
        # Load dataset
        self.df = pd.read_csv(dataset_path)
        
        # Preprocessing attributes
        self.X = None
        self.y = None
        self.X_scaled = None
        self.y_encoded = None
        
        # Model
        self.model = None
        
        # Preprocessing and model training steps
        self._preprocess_data()

In [8]:
def _preprocess_data(self):
    """
    Comprehensive data preprocessing
    """
    # ... (previous code remains the same)

def train_model(self, penalty='l2', C=1.0, multi_class='ovr'):
    """
    Train Logistic Regression Model
    
    Parameters:
    - penalty: Regularization type ('l1', 'l2')
    - C: Inverse of regularization strength
    - multi_class: Multiclass handling strategy
    """
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        self.X_scaled, 
        self.y_encoded, 
        test_size=0.2, 
        random_state=42
    )
    
    # Initialize and train model
    self.model = LogisticRegression(
        penalty=penalty, 
        C=C, 
        multi_class=multi_class,
        solver='liblinear',
        random_state=42
    )
    self.model.fit(X_train, y_train)
    
    # Predictions
    y_pred = self.model.predict(X_test)
    
    # Evaluation metrics
    print("\n--- Model Performance ---")
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("Precision (Macro):", precision_score(y_test, y_pred, average='macro'))
    print("Recall (Macro):", recall_score(y_test, y_pred, average='macro'))
    print("F1-Score (Macro):", f1_score(y_test, y_pred, average='macro'))
    
    # Detailed Classification Report
    print("\nDetailed Classification Report:")
    print(classification_report(y_test, y_pred))
    
    # Visualize Confusion Matrix
    plt.figure(figsize=(10, 8))
    cm = confusion_matrix(y_test, y_pred)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title('Confusion Matrix - Logistic Regression')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.tight_layout()
    plt.show()
    
    # Feature Coefficients
    self._analyze_feature_importance()

In [10]:
# Main Execution
def main():
    # Replace with your actual dataset path
    dataset_path = r"C:\Users\manas\Downloads\Train_data.csv.zip"
    
    # Initialize and run classifier
    classifier = CybersecurityThreatClassifier(dataset_path)
    
    # Train model with different configurations
    print("\n--- Training Model with Default Parameters ---")
    classifier.train_model()
    
    print("\n--- Performing Cross-Validation ---")
    classifier.cross_validation()

if __name__ == '__main__':
    main()

AttributeError: 'CybersecurityThreatClassifier' object has no attribute '_preprocess_data'