### A) For the following data, train a classifier using Logistic Regression without using any library functions.
x = {1, 2, 3, 4, 5, 6, 7, 8}

y = {0, 0, 0, 0, 1, 1, 1, 1}


## A) Logistic Regression without library functions



###### sigmoid function:
The sigmoid function is defined as:
![%7BB5428A90-DEA0-4E07-93B0-B4E1DA4D3AD4%7D.png](attachment:%7BB5428A90-DEA0-4E07-93B0-B4E1DA4D3AD4%7D.png)

![%7B41E61E12-8F39-4643-AB51-AB5D84B4F675%7D.png](attachment:%7B41E61E12-8F39-4643-AB51-AB5D84B4F675%7D.png)

###### Gradient Descent:
For each data point (xi ,yi)
the weight and bias updates can be calculated as:
    ![%7BE06E1E65-6EC1-4526-9FC1-1021726D8726%7D.png](attachment:%7BE06E1E65-6EC1-4526-9FC1-1021726D8726%7D.png)

![%7BFEF0BEA2-EFF7-44C2-B647-0830781B25E5%7D.png](attachment:%7BFEF0BEA2-EFF7-44C2-B647-0830781B25E5%7D.png)

In [17]:
import numpy as np

# Input data
x = np.array([1, 2, 3, 4, 5, 6, 7, 8])
y = np.array([0, 0, 0, 0, 1, 1, 1, 1])

# Initialize weights and bias
w = 0
b = 0
learning_rate = 0.01
epochs = 10000

# Sigmoid function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# Logistic loss function
def compute_loss(y, y_pred):
    return -np.mean(y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred))

# Training loop using gradient descent
for i in range(epochs):
    # Linear model
    z = w * x + b
    # Prediction using sigmoid
    y_pred = sigmoid(z)
    
    # Compute gradients
    dw = np.dot(x, (y_pred - y)) / len(x)
    db = np.mean(y_pred - y)
    
    # Update weights and bias
    w -= learning_rate * dw
    b -= learning_rate * db
    
    # Compute and print loss and predictions at specified intervals
    if i in [100, 1000, 10000]:
        loss = compute_loss(y, y_pred)
        y_pred_class = [1 if i > 0.5 else 0 for i in y_pred]
        print(f'Epoch {i}, Loss: {loss}, Predicted Classes: {y_pred_class}')

# Final predictions
y_pred = sigmoid(w * x + b)
y_pred_class = [1 if i > 0.5 else 0 for i in y_pred]
print('Final Predicted Classes:', y_pred_class)


Epoch 100, Loss: 0.586141507461083, Predicted Classes: [1, 1, 1, 1, 1, 1, 1, 1]
Epoch 1000, Loss: 0.4083396766213716, Predicted Classes: [0, 0, 0, 1, 1, 1, 1, 1]
Final Predicted Classes: [0, 0, 0, 0, 1, 1, 1, 1]


In [5]:
# Calculate performance metrics
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

# Convert lists to numpy arrays
y_pred_class = np.array(y_pred_class)

# Confusion Matrix
cm = confusion_matrix(y, y_pred_class)
print(f"Confusion Matrix:\n{cm}")

# Accuracy
accuracy = accuracy_score(y, y_pred_class)
print(f"Accuracy: {accuracy}")

# Precision
precision = precision_score(y, y_pred_class)
print(f"Precision: {precision}")

# Recall
recall = recall_score(y, y_pred_class)
print(f"Recall: {recall}")

# F1 Score
f1 = f1_score(y, y_pred_class)
print(f"F1 Score: {f1}")


Confusion Matrix:
[[4 0]
 [0 4]]
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0


![%7B626EAA10-5B26-47F1-84AB-455A69C3D5BF%7D.png](attachment:%7B626EAA10-5B26-47F1-84AB-455A69C3D5BF%7D.png)



In [17]:
y_true = np.array([0,0,0,0,1,1,1,1])
y_pred_class = np.array([1, 1, 1, 1, 1, 1, 1, 1])

TP=0
FP=0
FN=0
TN=0

for i in range(len(y_true)):
    if y_true[i]==1 and y_pred_class[i]==1:
        TP += 1
    elif  y_true[i] ==0 and y_pred_class[i]==0:
        TN+=1
    elif  y_true[i] ==0 and y_pred_class[i]==1:
        FP+=1
    elif  y_true[i] ==1 and y_pred_class[i]==0:
        FN+=1
    
# Confusion Matrix
print(f'Confusion Matrix:\nTP: {TP},TN: {TN},FP: {FP},FN: {FN}') 
    
accuracy = (TP+TN) / (TP+TN+FP+FN)
print(f'Accuracy: {accuracy}')


    
                    

Confusion Matrix:
TP: 4,TN: 0,FP: 4,FN: 0
Accuracy: 0.5


In [18]:
y_true = np.array([0,0,0,0,1,1,1,1])
y_pred_class = np.array([0, 0, 0, 1, 1, 1, 1, 1])


TP=0
FP=0
FN=0
TN=0

for i in range(len(y_true)):
    if y_true[i]==1 and y_pred_class[i]==1:
        TP += 1
    elif  y_true[i] ==0 and y_pred_class[i]==0:
        TN+=1
    elif  y_true[i] ==0 and y_pred_class[i]==1:
        FP+=1
    elif  y_true[i] ==1 and y_pred_class[i]==0:
        FN+=1
    
# Confusion Matrix
print(f'Confusion Matrix:\nTP: {TP},TN: {TN},FP: {FP},FN: {FN}') 
    
accuracy = (TP+TN) / (TP+TN+FP+FN)
print(f'Accuracy: {accuracy}')


    
                    

Confusion Matrix:
TP: 4,TN: 3,FP: 1,FN: 0
Accuracy: 0.875


In [21]:
y_true = np.array([0,0,0,0,1,1,1,1])
y_pred_class = np.array([0, 0, 0, 0, 1, 1, 1, 1])

TP=0
FP=0
FN=0
TN=0

for i in range(len(y_true)):
    if y_true[i]==1 and y_pred_class[i]==1:
        TP += 1
    elif  y_true[i] ==0 and y_pred_class[i]==0:
        TN+=1
    elif  y_true[i] ==0 and y_pred_class[i]==1:
        FP+=1
    elif  y_true[i] ==1 and y_pred_class[i]==0:
        FN+=1
    
# Confusion Matrix
print(f'Confusion Matrix:\nTP: {TP},TN: {TN},FP: {FP},FN: {FN}') 
    
accuracy = (TP+TN) / (TP+TN+FP+FN)
print(f'Accuracy: {accuracy}')


    
                    

Confusion Matrix:
TP: 4,TN: 4,FP: 0,FN: 0
Accuracy: 1.0


In [22]:
if TP+FP != 0:
    precision = TP/(TP+FP)
else:
    precision = 0
print(f'Precision: {precision}')


Precision: 1.0


In [23]:
if TP+FN != 0:
    Recall = TP/(TP+FN)
else:
    Recall = 0
print(f'Recall: {Recall}')

Recall: 1.0


In [24]:
if precision+Recall !=0:
    f1_score = 2*(precision*Recall)/(precision+Recall)
else:
    f1_score = 0
    
print(f'F1_score {f1_score}')

F1_score 1.0


In [25]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

# Define the input data
x = np.array([1, 2, 3, 4, 5, 6, 7, 8]).reshape(-1, 1)
y = np.array([0, 0, 0, 0, 1, 1, 1, 1])

# Create a Logistic Regression model
model = LogisticRegression()

# Train the model
model.fit(x, y)

# Predict the class labels
y_pred = model.predict(x)

# Calculate performance metrics
conf_matrix = confusion_matrix(y, y_pred)
accuracy = accuracy_score(y, y_pred)
precision = precision_score(y, y_pred)
recall = recall_score(y, y_pred)
f1 = f1_score(y, y_pred)

# Print the results
print("Confusion Matrix:")
print(conf_matrix)
print("\nAccuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)


Confusion Matrix:
[[4 0]
 [0 4]]

Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0


###### B) Implement a Logistic Regression model using the Diabetes dataset from the sklearn datasets module. The objective is to predict whether a patient has diabetes based on various medical attributes.


Implement all the performance metrics for classification on both problems.
1. Confusion Matrix
2. Accuracy
3. Precision
4. Recall
5. F1 Score

##### Below is the implementation of a Logistic Regression model using the Diabetes dataset from the sklearn.datasets module. This model predicts whether a patient has diabetes based on medical attributes, and we will also calculate the performance metrics: Confusion Matrix, Accuracy, Precision, Recall, and F1 Score.


    
Load the Diabetes dataset.

Train a Logistic Regression model.

Evaluate the model using classification metrics.

In [41]:
#  Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


# Load the diabetes dataset from sklearn
data = load_diabetes()


X = data.data  # Features
y = (data.target > np.median(data.target)).astype(int)  # Binary target: 1 if above median, else 0

#  Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#  Train a Logistic Regression model
log_reg = LogisticRegression(max_iter=10000)

# Fit the model to the training data
log_reg.fit(X_train, y_train)


# Make predictions on the test data
y_pred = log_reg.predict(X_test)

# Evaluate performance
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

# Precision
precision = precision_score(y_test, y_pred)
print(f"Precision: {precision}")

# Recall
recall = recall_score(y_test, y_pred)
print(f"Recall: {recall}")

if precision+Recall !=0:
    f1_score = 2*(precision*Recall)/(precision+Recall)
else:
    f1_score = 0
    
print(f'F1_score {f1_score}')


# Print results
print(f"Accuracy: {accuracy:.4f}")
print("Confusion Matrix:")
print(conf_matrix)
print("Classification Report:")
print(class_report)

print(y_test)
print(y_pred)



Precision: 0.7073170731707317
Recall: 0.725
F1_score 0.7280334728033473
Accuracy: 0.7416
Confusion Matrix:
[[37 12]
 [11 29]]
Classification Report:
              precision    recall  f1-score   support

           0       0.77      0.76      0.76        49
           1       0.71      0.72      0.72        40

    accuracy                           0.74        89
   macro avg       0.74      0.74      0.74        89
weighted avg       0.74      0.74      0.74        89

[1 0 1 1 0 0 1 1 0 0 0 1 0 1 0 0 1 1 1 1 1 0 0 1 0 0 1 0 0 0 0 0 0 1 1 1 0
 0 1 0 0 0 1 1 0 0 1 0 0 1 0 1 0 0 1 0 0 1 1 0 0 0 1 0 1 1 1 0 0 1 0 1 1 0
 0 0 1 1 1 1 0 1 0 0 1 0 0 0 1]
[1 1 1 1 1 0 1 1 0 0 0 0 0 1 0 0 1 1 1 1 1 0 0 1 0 1 1 1 0 0 1 0 0 1 1 1 0
 1 1 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 1 1 0 0 0 1 1 1 0 0 1 1 1 0
 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0]


In [42]:
y_true = y_test
y_pred_class = y_pred_class

TP=0
FP=0
FN=0
TN=0

for i in range(len(y_true)):
    if y_true[i]==1 and y_pred_class[i]==1:
        TP += 1
    elif  y_true[i] ==0 and y_pred_class[i]==0:
        TN+=1
    elif  y_true[i] ==0 and y_pred_class[i]==1:
        FP+=1
    elif  y_true[i] ==1 and y_pred_class[i]==0:
        FN+=1
    
# Confusion Matrix
print(f'Confusion Matrix:\nTP: {TP},TN: {TN},FP: {FP},FN: {FN}') 
    
accuracy = (TP+TN) / (TP+TN+FP+FN)
print(f'Accuracy: {accuracy}')

if TP+FP != 0:
    precision = TP/(TP+FP)
else:
    precision = 0
print(f'Precision: {precision}')


if TP+FN != 0:
    Recall = TP/(TP+FN)
else:
    Recall = 0
print(f'Recall: {Recall}')

if precision+Recall !=0:
    f1_score = 2*(precision*Recall)/(precision+Recall)
else:
    f1_score = 0
    
print(f'F1_score {f1_score}')

Confusion Matrix:
TP: 30,TN: 33,FP: 16,FN: 10
Accuracy: 0.7078651685393258
Precision: 0.6521739130434783
Recall: 0.75
F1_score 0.6976744186046512


###### Standardization helps to scale the features so that they have a mean of 0 and a standard deviation of 1, which can often improve the performance of algorithms like logistic regression.

In [43]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, precision_score, recall_score

# Load the diabetes dataset from sklearn
data = load_diabetes()

X = data.data  # Features
y = (data.target > np.median(data.target)).astype(int)  # Binary target: 1 if above median, else 0

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data (mean=0, std=1)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)  # Fit the scaler on training data and transform it
X_test = scaler.transform(X_test)        # Use the same scaler to transform test data

# Train a Logistic Regression model
log_reg = LogisticRegression(max_iter=10000)

# Fit the model to the training data
log_reg.fit(X_train, y_train)

# Make predictions on the test data
y_pred = log_reg.predict(X_test)

# Evaluate performance
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

# Precision
precision = precision_score(y_test, y_pred)
print(f"Precision: {precision:.4f}")

# Recall
recall = recall_score(y_test, y_pred)
print(f"Recall: {recall:.4f}")

# F1 Score (calculated manually if precision + recall is not zero)
if precision + recall != 0:
    f1_score = 2 * (precision * recall) / (precision + recall)
else:
    f1_score = 0

print(f"F1 Score: {f1_score:.4f}")

# Print results
print(f"Accuracy: {accuracy:.4f}")
print("Confusion Matrix:")
print(conf_matrix)
print("Classification Report:")
print(class_report)

# Print test and predicted values
print("True labels (y_test):", y_test)
print("Predicted labels (y_pred):", y_pred)


Precision: 0.6905
Recall: 0.7250
F1 Score: 0.7073
Accuracy: 0.7303
Confusion Matrix:
[[36 13]
 [11 29]]
Classification Report:
              precision    recall  f1-score   support

           0       0.77      0.73      0.75        49
           1       0.69      0.72      0.71        40

    accuracy                           0.73        89
   macro avg       0.73      0.73      0.73        89
weighted avg       0.73      0.73      0.73        89

True labels (y_test): [1 0 1 1 0 0 1 1 0 0 0 1 0 1 0 0 1 1 1 1 1 0 0 1 0 0 1 0 0 0 0 0 0 1 1 1 0
 0 1 0 0 0 1 1 0 0 1 0 0 1 0 1 0 0 1 0 0 1 1 0 0 0 1 0 1 1 1 0 0 1 0 1 1 0
 0 0 1 1 1 1 0 1 0 0 1 0 0 0 1]
Predicted labels (y_pred): [0 1 0 1 0 0 1 1 0 0 0 1 0 1 0 0 1 1 1 1 1 0 0 1 1 1 1 1 0 0 1 0 0 1 1 1 0
 0 0 0 0 0 1 1 1 0 0 0 0 1 1 0 0 0 1 1 0 1 0 0 1 1 0 0 0 1 1 1 0 0 1 1 1 0
 0 1 1 1 0 1 0 1 0 0 0 0 0 0 1]
