In [1]:
# Demonstration of Logistic Regression for a sample training data set stored as a .CSV file. Calculate the accuracy,
# precision, and recall for your dataset.

In [2]:
# Logistic Regression is a statistical method for binary classification that predicts the probability of a binary outcome
# (1/0, Yes/No, True/False) based on one or more independent variables. Unlike linear regression, which predicts a
# continuous outcome, logistic regression transforms its output using the logistic sigmoid function to return a
# probability value between 0 and 1.
# In this demonstration, we'll train a logistic regression model on a sample dataset stored in a .CSV file. The dataset
# includes features (independent variables) and a target label (dependent variable) which is binary. After training the
# model, we will evaluate its performance using accuracy, precision, and recall.

In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.datasets import make_classification
from sklearn.metrics import confusion_matrix

# Step 1: Load the data
# data = pd.read_csv('sample_data.csv')
# Step 2: Data Preprocessing
# Separate features (X) and target label (y)
# X = data.drop('target', axis=1)
# y = data['target']
# Handle missing values (if any) - optional step
X, y = make_classification(n_samples=500, n_features=10, n_classes=2, random_state=42)
# X.fillna(X.mean(), inplace=True)
# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Step 3: Model Training
model = LogisticRegression()
model.fit(X_train, y_train)
# Step 4: Prediction
y_pred = model.predict(X_test)
# Step 5: Evaluation
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
conf=confusion_matrix(y_test,y_pred)
print(conf)
# Print the results
print(f'Accuracy: {accuracy:.2f}')
print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')

[[47  7]
 [ 5 41]]
Accuracy: 0.88
Precision: 0.85
Recall: 0.89


In [None]:
# OUTPUT:
# The output will include the following evaluation metrics printed on the console:
# Accuracy: 0.85
# Precision: 0.80
# Recall: 0.75
# These metrics indicate how well the logistic regression model performed on the test data. A higher value in each
# metric suggests better model performance