In [None]:
# What is Logistic Regression, and how does it differ from Linear Regression?

Logistic Regression is a classification algorithm used when the dependent variable is categorical (binary or multiclass). Unlike Linear Regression, which predicts continuous values, Logistic Regression predicts probabilities of class membership.

Linear Regression is used for regression tasks, and it predicts continuous values.
Logistic Regression is used for classification tasks, and it predicts the probability of a data point belonging to a particular class.
from sklearn.linear_model import LinearRegression, LogisticRegression
import numpy as np

# Dummy data
X = np.array([[1], [2], [3], [4], [5]])
y_reg = np.array([10, 15, 20, 25, 30])   # Linear target
y_clf = np.array([0, 0, 1, 1, 1])        # Classification target

# Linear Regression
linear_model = LinearRegression()
linear_model.fit(X, y_reg)
print(f"Linear Regression Prediction: {linear_model.predict([[6]])}")

# Logistic Regression
logistic_model = LogisticRegression()
logistic_model.fit(X, y_clf)
print(f"Logistic Regression Prediction: {logistic_model.predict_proba([[6]])}")


In [None]:
# What is the mathematical equation of Logistic Regression?

The mathematical equation for Logistic Regression is based on the sigmoid function:

𝑃
(
𝑦
=
1
∣
𝑥
)
=
𝜎
(
𝑧
)
=
1
1
+
𝑒
−
𝑧
,
 where
𝑧
=
𝑤
𝑇
𝑥
+
𝑏
P(y=1∣x)=σ(z)=
1+e
−z

1
​
 , where z=w
T
 x+b
Where:

𝑃
(
𝑦
=
1
∣
𝑥
)
P(y=1∣x) is the probability that the target class is 1, given the input
𝑥
x.
𝑤
𝑇
𝑥
+
𝑏
w
T
 x+b is the linear combination of input features.
𝜎
(
𝑧
)
σ(z) is the sigmoid function.
import numpy as np

# Sigmoid function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

z = np.linspace(-10, 10, 100)
sigmoid_values = sigmoid(z)


In [None]:
# Why do we use the Sigmoid function in Logistic Regression?

The sigmoid function transforms the linear output into a probability value between 0 and 1, which is essential for binary classification. The sigmoid curve helps model the probability of the input belonging to class 1 or class 0:

𝜎
(
𝑧
)
=
1
1
+
𝑒
−
𝑧
σ(z)=
1+e
−z

1
​

This maps any real-valued number into the (0, 1) range.

import matplotlib.pyplot as plt

# Plotting the sigmoid function
plt.plot(z, sigmoid_values)
plt.title('Sigmoid Function')
plt.xlabel('z')
plt.ylabel('sigmoid(z)')
plt.show()


In [None]:
# What is the cost function of Logistic Regression?

The cost function for Logistic Regression is the log loss or binary cross-entropy:

Cost
(
ℎ
(
𝑥
)
,
𝑦
)
=
−
[
𝑦
log
⁡
(
ℎ
(
𝑥
)
)
+
(
1
−
𝑦
)
log
⁡
(
1
−
ℎ
(
𝑥
)
)
]
Cost(h(x),y)=−[ylog(h(x))+(1−y)log(1−h(x))]
This function penalizes wrong predictions and helps the model learn the correct parameters.

from sklearn.metrics import log_loss

y_true = [0, 0, 1, 1]
y_pred = [0.1, 0.4, 0.35, 0.8]
loss = log_loss(y_true, y_pred)
print(f"Log Loss: {loss}")


In [None]:
# What is Regularization in Logistic Regression? Why is it needed?

Regularization adds a penalty to the cost function to prevent the model from overfitting by discouraging large coefficients. The two most common types are:

L1 Regularization (Lasso): Adds the sum of the absolute values of the coefficients as a penalty.
L2 Regularization (Ridge): Adds the sum of the squared values of the coefficients as a penalty.
Need: Regularization prevents overfitting by reducing model complexity, ensuring that the model generalizes well to unseen data.

Regularization Terms:
L1 Regularization:
𝜆
∑
∣
𝑤
𝑗
∣
λ∑∣w
j
​
 ∣
L2 Regularization:
𝜆
∑
𝑤
𝑗
2
λ∑w
j
2
​

# L1 regularization (Lasso)
logistic_l1 = LogisticRegression(penalty='l1', solver='liblinear')
logistic_l1.fit(X, y_clf)

# L2 regularization (Ridge)
logistic_l2 = LogisticRegression(penalty='l2')
logistic_l2.fit(X, y_clf)

print(f"L1 Regularization Coefficients: {logistic_l1.coef_}")
print(f"L2 Regularization Coefficients: {logistic_l2.coef_}")




In [None]:
# Explain the difference between Lasso, Ridge, and Elastic Net regression.

Lasso Regression (L1): Shrinks some coefficients to exactly zero, useful for feature selection.
Ridge Regression (L2): Penalizes large coefficients but doesn’t set any to zero.
Elastic Net: Combines both L1 and L2 penalties.

from sklearn.linear_model import ElasticNet

elastic_net = ElasticNet(alpha=1.0, l1_ratio=0.5)
elastic_net.fit(X, y_reg)
print(f"Elastic Net Coefficients: {elastic_net.coef_}")


In [None]:
# What is the impact of the regularization parameter (λ) in Logistic Regression?

The regularization parameter (λ) controls the strength of the regularization:

A small λ allows the model to fit the training data closely, possibly leading to overfitting. A large λ penalizes the weights more, reducing overfitting but potentially underfitting the data.

In [None]:
# What are the key assumptions of Logistic Regression?

Linearity: The relationship between the independent variables and the log-odds is linear.
Independence: The observations are independent of each other.
No multicollinearity: Independent variables should not be highly correlated.


In [None]:
# What are some alternatives to Logistic Regression for classification tasks?

Decision Trees
Random Forests
Support Vector Machines (SVM)
k-Nearest Neighbors (KNN)
Gradient Boosting Classifiers (e.g., XGBoost)

In [None]:
# What are Classification Evaluation Metrics?

Accuracy

Precision

Recall

F1-Score

ROC AUC Score

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

y_true = [0, 1, 1, 0, 1]
y_pred = [0, 0, 1, 0, 1]

accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)

print(f"Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}, F1-Score: {f1}")


In [None]:
# How does class imbalance affect Logistic Regression?

In the presence of class imbalance, Logistic Regression might be biased towards the majority class. This can be handled by:

Using class weights (class_weight='balanced' in scikit-learn).
Using oversampling or undersampling techniques.

In [None]:
# What is Hyperparameter Tuning in Logistic Regression?

Hyperparameter tuning involves selecting the optimal values for parameters like the regularization strength (λ). This can be done using techniques like Grid Search or Randomized Search.

from sklearn.model_selection import GridSearchCV

param_grid = {'C': [0.01, 0.1, 1, 10]}
grid = GridSearchCV(LogisticRegression(), param_grid, cv=5)
grid.fit(X, y_clf)
print(f"Best parameters: {grid.best_params_}")


In [None]:
# What are different solvers in Logistic Regression? Which one should be used?

liblinear: For small datasets; supports L1 regularization.
lbfgs: For larger datasets; faster convergence.
saga: For large datasets; supports both L1 and L2 regularization.

In [None]:
# How is Logistic Regression extended for multiclass classification?

Logistic Regression is extended to multiclass classification using:

One-vs-Rest (OvR): Fits one classifier per class.
Softmax Regression (Multinomial): Generalizes to multiple classes by predicting probabilities for each class directly.

In [None]:
# What are the advantages and disadvantages of Logistic Regression?

Advantages: Simple, interpretable, probabilistic output, efficient.
Disadvantages: Assumes linearity, not suitable for complex relationships.

In [None]:
# What are some use cases of Logistic Regression?

Predicting whether a customer will churn (binary classification).
Medical diagnosis (e.g., predicting the probability of a disease).
Email classification (spam vs. not spam).


In [None]:
# What is the difference between Softmax Regression and Logistic Regression?

Logistic Regression: Used for binary classification.
Softmax Regression: Used for multiclass classification, where the output probabilities sum up to 1 across all classes.

In [None]:
# How do we choose between One-vs-Rest (OvR) and Softmax for multiclass classification?

OvR: Builds a binary classifier for each class.
Softmax: Handles multiclass directly in a single model, more efficient for multiclass tasks.


In [None]:
# How do we interpret coefficients in Logistic Regression?

The coefficients in Logistic Regression represent the change in the log-odds of the outcome for a unit change in the predictor. The exponentiated coefficient
𝑒
𝑤
𝑗
e
w
j
​

  gives the odds ratio.

  coef = logistic_model.coef_[0]
odds_ratios = np.exp(coef)
print(f"Odds Ratios: {odds_ratios}")


In [None]:
#                                                                          Practical

In [None]:
# Write a Python program that loads a dataset, splits it into training and testing sets, applies Logistic Regression, and prints the model accuracy.?

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Load dataset
data = load_iris()
X, y = data.data, data.target

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Apply Logistic Regression
model = LogisticRegression()
model.fit(X_train, y_train)

# Predict and print accuracy
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")


In [None]:
# Write a Python program to apply L1 regularization (Lasso) on a dataset using LogisticRegression(penalty='l1') and print the model accuracy.?

from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load dataset
X, y = load_iris(return_X_y=True)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# L1 Regularization (Lasso)
model = LogisticRegression(penalty='l1', solver='liblinear')
model.fit(X_train, y_train)

# Predict and print accuracy
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"L1 Regularization Model Accuracy: {accuracy:.2f}")


In [None]:
# Write a Python program to train Logistic Regression with L2 regularization (Ridge) using LogisticRegression(penalty='l2'). Print model accuracy and coefficients.

from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load dataset
X, y = load_iris(return_X_y=True)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# L2 Regularization (Ridge)
model = LogisticRegression(penalty='l2', solver='lbfgs')
model.fit(X_train, y_train)

# Predict and print accuracy and coefficients
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"L2 Regularization Model Accuracy: {accuracy:.2f}")
print(f"Model Coefficients: {model.coef_}")


In [None]:
# Write a Python program to train Logistic Regression with Elastic Net Regularization (penalty='elasticnet').

from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# Load dataset
X, y = load_iris(return_X_y=True)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Elastic Net Regularization
model = LogisticRegression(penalty='elasticnet', solver='saga', l1_ratio=0.5)
model.fit(X_train, y_train)

# Predict and print accuracy
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Elastic Net Model Accuracy: {accuracy:.2f}")


In [None]:
# Write a Python program to train a Logistic Regression model for multiclass classification using multi_class='ovr'.

from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load dataset
X, y = load_iris(return_X_y=True)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Multiclass Logistic Regression with OvR
model = LogisticRegression(multi_class='ovr')
model.fit(X_train, y_train)

# Predict and print accuracy
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Multiclass (OvR) Model Accuracy: {accuracy:.2f}")


In [None]:
# Write a Python program to apply GridSearchCV to tune the hyperparameters (C and penalty) of Logistic Regression. Print the best parameters and accuracy.

from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# Load dataset
X, y = load_iris(return_X_y=True)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the model
model = LogisticRegression()

# Define the hyperparameters grid
param_grid = {
    'C': [0.01, 0.1, 1, 10],
    'penalty': ['l1', 'l2'],
    'solver': ['liblinear']
}

# Apply GridSearchCV
grid_search = GridSearchCV(model, param_grid, cv=5)
grid_search.fit(X_train, y_train)

# Print the best parameters and accuracy
best_params = grid_search.best_params_
accuracy = grid_search.best_score_
print(f"Best Parameters: {best_params}")
print(f"Best Accuracy: {accuracy:.2f}")


In [None]:
# Write a Python program to evaluate Logistic Regression using Stratified K-Fold Cross-Validation. Print the average accuracy.

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.datasets import load_iris

# Load dataset
X, y = load_iris(return_X_y=True)

# Logistic Regression model
model = LogisticRegression()

# Stratified K-Fold Cross-Validation
skf = StratifiedKFold(n_splits=5)
scores = cross_val_score(model, X, y, cv=skf)

# Print the average accuracy
print(f"Average Accuracy: {scores.mean():.2f}")


In [None]:
# Write a Python program to load a dataset from a CSV file, apply Logistic Regression, and evaluate its accuracy.

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Load dataset from CSV
df = pd.read_csv('your_dataset.csv')

# Assuming the last column is the target and the rest are features
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Logistic Regression
model = LogisticRegression()
model.fit(X_train, y_train)

# Predict and print accuracy
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")


In [None]:
# Write a Python program to apply RandomizedSearchCV for tuning hyperparameters (C, penalty, solver) in Logistic Regression. Print the best parameters and accuracy?

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import RandomizedSearchCV
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# Load dataset
X, y = load_iris(return_X_y=True)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the Logistic Regression model
model = LogisticRegression()

# Define the hyperparameters grid
param_distributions = {
    'C': [0.01, 0.1, 1, 10],
    'penalty': ['l1', 'l2'],
    'solver': ['liblinear', 'saga']
}

# Apply RandomizedSearchCV
random_search = RandomizedSearchCV(model, param_distributions, n_iter=10, cv=5, random_state=42)
random_search.fit(X_train, y_train)

# Print the best parameters and accuracy
best_params = random_search.best_params_
accuracy = random_search.best_score_
print(f"Best Parameters: {best_params}")
print(f"Best Accuracy: {accuracy:.2f}")


In [None]:
# Write a Python program to implement One-vs-One (OvO) Multiclass Logistic Regression and print accuracy.

from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsOneClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load dataset
X, y = load_iris(return_X_y=True)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# One-vs-One Logistic Regression
model = OneVsOneClassifier(LogisticRegression())
model.fit(X_train, y_train)

# Predict and print accuracy
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"One-vs-One Logistic Regression Accuracy: {accuracy:.2f}")


In [None]:
# Write a Python program to train a Logistic Regression model and visualize the confusion matrix for binary classification.

import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import confusion_matrix

# Load dataset
X, y = load_breast_cancer(return_X_y=True)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Predict and visualize confusion matrix
y_pred = model.predict(X_test)
cm = confusion_matrix(y_test, y_pred)

# Plot confusion matrix
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()


In [None]:
# Write a Python program to train a Logistic Regression model and evaluate its performance using Precision, Recall, and F1-Score.

from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score

# Load dataset
X, y = load_breast_cancer(return_X_y=True)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Predict and calculate metrics
y_pred = model.predict(X_test)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-Score: {f1:.2f}")


In [None]:
# Write a Python program to train a Logistic Regression model on imbalanced data and apply class weights to improve model performance.

from sklearn.linear_model import LogisticRegression
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Create an imbalanced dataset
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, weights=[0.9, 0.1], random_state=42)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Logistic Regression model with class weights
model = LogisticRegression(class_weight='balanced')
model.fit(X_train, y_train)

# Predict and print accuracy
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy on Imbalanced Data: {accuracy:.2f}")


In [None]:
# Write a Python program to train Logistic Regression on the Titanic dataset, handle missing values, and evaluate performance.

import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.impute import SimpleImputer

# Load Titanic dataset
df = pd.read_csv('titanic.csv')

# Feature selection and preprocessing
df = df[['Pclass', 'Sex', 'Age', 'Fare', 'Survived']].dropna()
df['Sex'] = df['Sex'].map({'male': 0, 'female': 1})

# Handle missing values (if any)
imputer = SimpleImputer(strategy='mean')
df['Age'] = imputer.fit_transform(df[['Age']])

X = df[['Pclass', 'Sex', 'Age', 'Fare']]
y = df['Survived']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Logistic Regression
model = LogisticRegression()
model.fit(X_train, y_train)

# Predict and print accuracy
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy on Titanic Data: {accuracy:.2f}")


In [None]:
# Write a Python program to apply feature scaling (Standardization) before training a Logistic Regression model. Evaluate its accuracy and compare results with and without scaling.

from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Load dataset
X, y = load_iris(return_X_y=True)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Apply standardization
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Logistic Regression without scaling
model_no_scaling = LogisticRegression()
model_no_scaling.fit(X_train, y_train)
y_pred_no_scaling = model_no_scaling.predict(X_test)
accuracy_no_scaling = accuracy_score(y_test, y_pred_no_scaling)

# Logistic Regression with scaling
model_scaled = LogisticRegression()
model_scaled.fit(X_train_scaled, y_train)
y_pred_scaled = model_scaled.predict(X_test_scaled)
accuracy_scaled = accuracy_score(y_test, y_pred_scaled)

print(f"Accuracy without Scaling: {accuracy_no_scaling:.2f}")
print(f"Accuracy with Scaling: {accuracy_scaled:.2f}")


In [None]:
# Write a Python program to train Logistic Regression and evaluate its performance using ROC-AUC score.

from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

# Load dataset
X, y = load_breast_cancer(return_X_y=True)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Logistic Regression
model = LogisticRegression()
model.fit(X_train, y_train)

# Predict and evaluate ROC-AUC score
y_prob = model.predict_proba(X_test)[:, 1]
roc_auc = roc_auc_score(y_test, y_prob)
print(f"ROC-AUC Score: {roc_auc:.2f}")


In [None]:
# Write a Python program to train Logistic Regression using a custom learning rate (C=0.5) and evaluate accuracy.

from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load dataset
X, y = load_iris(return_X_y=True)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Logistic Regression with C=0.5
model = LogisticRegression(C=0.5)
model.fit(X_train, y_train)

# Predict and print accuracy
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy with C=0.5: {accuracy:.2f}")


In [None]:
# Write a Python program to train Logistic Regression and identify important features based on model coefficients.

import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# Load dataset
X, y = load_iris(return_X_y=True)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Identify important features based on coefficients
feature_importance = np.abs(model.coef_[0])
important_features = np.argsort(feature_importance)[::-1]

print(f"Feature Coefficients: {model.coef_}")
print(f"Most Important Features (in order): {important_features}")


In [None]:
# Write a Python program to train Logistic Regression and evaluate its performance using Cohen’s Kappa Score.

from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import cohen_kappa_score

# Load dataset
X, y = load_breast_cancer(return_X_y=True)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Predict and evaluate using Cohen's Kappa Score
y_pred = model.predict(X_test)
kappa_score = cohen_kappa_score(y_test, y_pred)
print(f"Cohen's Kappa Score: {kappa_score:.2f}")


In [None]:
# Write a Python program to train Logistic Regression and visualize the Precision-Recall Curve for binary classification.

import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_curve, plot_precision_recall_curve

# Load dataset
X, y = load_breast_cancer(return_X_y=True)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Predict and plot Precision-Recall curve
y_prob = model.predict_proba(X_test)[:, 1]
precision, recall, _ = precision_recall_curve(y_test, y_prob)

plt.plot(recall, precision, marker='.')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve')
plt.show()


In [None]:
# Write a Python program to train Logistic Regression with different solvers (liblinear, saga, lbfgs) and compare their accuracy.

from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load dataset
X, y = load_iris(return_X_y=True)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Logistic Regression with different solvers
solvers = ['liblinear', 'saga', 'lbfgs']
for solver in solvers:
    model = LogisticRegression(solver=solver, max_iter=5000)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy with {solver} solver: {accuracy:.2f}")


In [None]:
# Write a Python program to train Logistic Regression and evaluate its performance using Matthews Correlation Coefficient (MCC).

from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import matthews_corrcoef

# Load dataset
X, y = load_breast_cancer(return_X_y=True)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Predict and evaluate using MCC
y_pred = model.predict(X_test)
mcc = matthews_corrcoef(y_test, y_pred)
print(f"Matthews Correlation Coefficient (MCC): {mcc:.2f}")


In [None]:
# Write a Python program to train Logistic Regression on both raw and standardized data. Compare their accuracy to see the impact of feature scaling.

from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Load dataset
X, y = load_iris(return_X_y=True)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Logistic Regression on raw data
model_raw = LogisticRegression()
model_raw.fit(X_train, y_train)
y_pred_raw = model_raw.predict(X_test)
accuracy_raw = accuracy_score(y_test, y_pred_raw)

# Standardize data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train Logistic Regression on standardized data
model_scaled = LogisticRegression()
model_scaled.fit(X_train_scaled, y_train)
y_pred_scaled = model_scaled.predict(X_test_scaled)
accuracy_scaled = accuracy_score(y_test, y_pred_scaled)

print(f"Accuracy on raw data: {accuracy_raw:.2f}")
print(f"Accuracy on standardized data: {accuracy_scaled:.2f}")


In [None]:
# Write a Python program to train Logistic Regression and find the optimal C (regularization strength) using cross-validation.

from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, cross_val_score

# Load dataset
X, y = load_iris(return_X_y=True)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Logistic Regression with different C values
C_values = [0.01, 0.1, 1, 10, 100]
for C in C_values:
    model = LogisticRegression(C=C)
    scores = cross_val_score(model, X_train, y_train, cv=5)
    print(f"Accuracy with C={C}: {scores.mean():.2f}")


In [None]:
# Write a Python program to train Logistic Regression, save the trained model using joblib, and load it again to make predictions.

from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import joblib

# Load dataset
X, y = load_iris(return_X_y=True)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Save the model to a file
joblib.dump(model, 'logistic_model.pkl')

# Load the model from the file
loaded_model = joblib.load('logistic_model.pkl')

# Make predictions and evaluate accuracy
y_pred = loaded_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Loaded Model Accuracy: {accuracy:.2f}")
