**Libraries**

In [1]:
# Install necessary libraries (if not already installed)
!pip install numpy matplotlib pandas




In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

# Load dataset
cancer_data = load_breast_cancer()
df = pd.DataFrame(data=cancer_data.data, columns=cancer_data.feature_names)
df['target'] = cancer_data.target

# Split data
X = df[cancer_data.feature_names]
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train logistic regression
model = LogisticRegression(max_iter=10000, random_state=42)
model.fit(X_train, y_train)

# Predictions and evaluation
y_pred = model.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%")
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Feature importance
feature_importance = pd.DataFrame({'Feature': cancer_data.feature_names, 'Coefficient': model.coef_[0]})
print(feature_importance.sort_values(by='Coefficient', ascending=False))

# Initialize and train the model
log_reg = LogisticRegression(max_iter=10000)
log_reg.fit(X_train, y_train)

# Make predictions
y_pred = log_reg.predict(X_test)

# Evaluate the model
print(f"Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%")
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


**With 2 Datasets**

In [19]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

# Function to train and evaluate logistic regression
def train_and_evaluate(X, y, dataset_name):
    """
    Train and evaluate a logistic regression model.

    Parameters:
        X: DataFrame or ndarray
            Features of the dataset.
        y: Series or ndarray
            Target variable.
        dataset_name: str
            Name of the dataset (for display purposes).
    """
    print(f"\n--- Results for {dataset_name} Dataset ---\n")

    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train logistic regression
    model = LogisticRegression(max_iter=10000, random_state=42)
    model.fit(X_train, y_train)

    # Predictions
    y_pred = model.predict(X_test)

    # Evaluation metrics
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy * 100:.2f}%")
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))
    print("Classification Report:")
    print(classification_report(y_test, y_pred))

    return accuracy

# --- Breast Cancer Dataset ---
print("Loading and processing Breast Cancer Dataset...")
# Load breast cancer dataset
cancer_data = load_breast_cancer()
cancer_df = pd.DataFrame(data=cancer_data.data, columns=cancer_data.feature_names)
cancer_df['target'] = cancer_data.target

# Separate features and target
X_cancer = cancer_df[cancer_data.feature_names]
y_cancer = cancer_df['target']

# Train and evaluate
accuracy_cancer = train_and_evaluate(X_cancer, y_cancer, "Breast Cancer")

# --- Smart Watch Dataset ---
print("\nLoading and processing Smart Watch Dataset...")
# Load smart watch dataset (replace with the actual file path if downloaded locally)
smart_watch_file_path = '/content/Smart Watch.csv'  # Replace with your actual file path
smart_watch_df = pd.read_csv(smart_watch_file_path)

# Preprocessing (encode categorical features and clean data)
smart_watch_df.dropna(inplace=True)  # Drop missing values

# One-Hot Encoding for all categorical columns (detecting object types)
categorical_columns = smart_watch_df.select_dtypes(include=['object']).columns.tolist()

# Ensure that 'Display_Type' column is not included as a categorical column for encoding
categorical_columns.remove('Display_Type')  # Remove the target variable column

# Apply One-Hot Encoding
smart_watch_df = pd.get_dummies(smart_watch_df, columns=categorical_columns, drop_first=True)

# Selecting 'Display_Type' as target and encoding it
le = LabelEncoder()
smart_watch_df['Display_Type'] = le.fit_transform(smart_watch_df['Display_Type'])

# Separate features and target
target_column = 'Display_Type'
X_smart_watch = smart_watch_df.drop(columns=[target_column])
y_smart_watch = smart_watch_df[target_column]

# Train and evaluate
accuracy_smart_watch = train_and_evaluate(X_smart_watch, y_smart_watch, "Smart Watch")

# --- Summary ---
print("\n--- Summary ---")
print(f"Breast Cancer Dataset Accuracy: {accuracy_cancer * 100:.2f}%")
print(f"Smart Watch Dataset Accuracy: {accuracy_smart_watch * 100:.2f}%")


Loading and processing Breast Cancer Dataset...

--- Results for Breast Cancer Dataset ---

Accuracy: 95.61%
Confusion Matrix:
[[39  4]
 [ 1 70]]
Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.91      0.94        43
           1       0.95      0.99      0.97        71

    accuracy                           0.96       114
   macro avg       0.96      0.95      0.95       114
weighted avg       0.96      0.96      0.96       114


Loading and processing Smart Watch Dataset...

--- Results for Smart Watch Dataset ---

Accuracy: 66.67%
Confusion Matrix:
[[ 8  0  0  1  0  0  0  0  0]
 [ 2  0  0  0  3  0  0  0  0]
 [ 0  0  4  0  0  0  0  0  0]
 [ 0  0  0  2  2  0  0  0  0]
 [ 2  1  0  0 11  0  0  0  0]
 [ 0  0  0  0  0  5  0  0  0]
 [ 0  1  0  0  1  0  1  0  0]
 [ 0  0  0  0  0  0  0  1  0]
 [ 1  0  0  0  2  0  0  0  0]]
Classification Report:
              precision    recall  f1-score   support

           0       0.62      0.

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
