# logistic regression ML model for caner data set.

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score
import joblib

# Read the CSV file into a DataFrame
data = pd.read_csv("Cancer_Data.csv")

# Assume the target variable is named 'target' and features are other columns
X = data.drop('diagnosis', axis=1)
y = data['diagnosis']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the StandardScaler
scaler = StandardScaler()

# Fit the scaler on the training data and transform both training and testing data
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize the Logistic Regression model
model = LogisticRegression()

# Train the model on the scaled training data
model.fit(X_train_scaled, y_train)



# Predict on the scaled testing data
y_pred = model.predict(X_test_scaled)

# Get predicted probabilities
y_pred_prob = model.predict_proba(X_test_scaled)[:, 1]

# Calculate and print classification report
classification_rep = classification_report(y_test, y_pred, target_names=["benign","malignant"])
print("Classification Report:\n", classification_rep)

# Calculate and print AUC-ROC score
auc_roc = roc_auc_score(y_test, y_pred_prob)
print("AUC-ROC Score:", auc_roc)


Classification Report:
               precision    recall  f1-score   support

      benign       0.97      0.99      0.98        71
   malignant       0.98      0.95      0.96        43

    accuracy                           0.97       114
   macro avg       0.97      0.97      0.97       114
weighted avg       0.97      0.97      0.97       114

AUC-ROC Score: 0.99737962659679


In [5]:
# Save the trained model and scaler to files using joblib
model_filename = "logistic_regression_model.pkl"
scaler_filename = "feature_scaler.pkl"
joblib.dump(model, model_filename)
joblib.dump(scaler, scaler_filename)
print(f"Model saved as '{model_filename}' and scaler saved as '{scaler_filename}'")

Model saved as 'logistic_regression_model.pkl' and scaler saved as 'feature_scaler.pkl'


In [4]:
from sklearn.metrics import accuracy_score
import joblib
import pandas as pd
import numpy as np

loaded_model = joblib.load("logistic_regression_model.pkl")
loaded_scaler = joblib.load("feature_scaler.pkl")
new_data =pd.read_csv("cancer_practiceData.csv")  # Load your new data as a DataFrame

# Scale the new data using the loaded scaler
new_data_scaled = loaded_scaler.transform(new_data)

# Make predictions using the loaded model
new_predictions = loaded_model.predict(new_data_scaled)

print(new_predictions)






['B']
