In [4]:
"""
Program to implement Logistic Regression Using Gradient Descent.
Developed by: SUDHARSAN S
RegisterNumber: 2122240403334
"""

# 1. Import Required Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# 2. Load the Dataset
dataset = pd.read_csv("Placement_Data.csv")
print("\n--- Step 1: Original Dataset ---\n")
print(dataset.head())

# 3. Drop Irrelevant Columns
dataset = dataset.drop('sl_no', axis=1)
dataset = dataset.drop('salary', axis=1)

# 4. Convert Categorical Data to Category Type
dataset["gender"] = dataset["gender"].astype('category')
dataset["ssc_b"] = dataset["ssc_b"].astype('category')
dataset["hsc_b"] = dataset["hsc_b"].astype('category')
dataset["degree_t"] = dataset["degree_t"].astype('category')
dataset["workex"] = dataset["workex"].astype('category')
dataset["specialisation"] = dataset["specialisation"].astype('category')
dataset["status"] = dataset["status"].astype('category')
dataset["hsc_s"] = dataset["hsc_s"].astype('category')

print("\n--- Step 2: Dataset Data Types ---\n")
print(dataset.dtypes)

# 5. Encode Categorical Data into Numerical Codes
dataset["gender"] = dataset["gender"].cat.codes
dataset["ssc_b"] = dataset["ssc_b"].cat.codes
dataset["hsc_b"] = dataset["hsc_b"].cat.codes
dataset["degree_t"] = dataset["degree_t"].cat.codes
dataset["workex"] = dataset["workex"].cat.codes
dataset["specialisation"] = dataset["specialisation"].cat.codes
dataset["status"] = dataset["status"].cat.codes
dataset["hsc_s"] = dataset["hsc_s"].cat.codes

print("\n--- Step 3: Encoded Dataset ---\n")
print(dataset.head())

# 6. Define Features (X) and Target (Y)
X = dataset.iloc[:, :-1].values
Y = dataset.iloc[:, -1].values

print("\n--- Step 4: Target Variable (Y) ---\n")
print(Y)

# 7. Initialize Parameters
theta = np.random.randn(X.shape[1])
y = Y

# 8. Define Sigmoid Function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# 9. Define Loss Function
def loss(theta, X, y):
    h = sigmoid(X.dot(theta))
    return -np.sum(y * np.log(h) + (1 - y) * np.log(1 - h))

# 10. Gradient Descent Function
def gradient_descent(theta, X, y, alpha, num_iterations):
    m = len(y)
    for i in range(num_iterations):
        h = sigmoid(X.dot(theta))
        gradient = X.T.dot(h - y) / m
        theta -= alpha * gradient
    return theta

# 11. Train the Model using Gradient Descent
theta = gradient_descent(theta, X, y, alpha=0.01, num_iterations=1000)

# 12. Prediction Function
def predict(theta, X):
    h = sigmoid(X.dot(theta))
    y_pred = np.where(h >= 0.5, 1, 0)
    return y_pred

# 13. Make Predictions on Training Data
y_pred = predict(theta, X)

print("\n--- Step 5: Predicted Values (y_pred) ---\n")
print(y_pred)

# 14. Calculate Model Accuracy
accuracy = np.mean(y_pred.flatten() == y)
print("\n--- Step 6: Model Accuracy ---\n")
print("Accuracy:", accuracy)

print("\n--- Step 7: Actual Target Values (Y) ---\n")
print(Y)

# 15. Test with New Data
xnew = np.array([[0,87,0,95,0,2,78,2,0,0,1,0]])
y_prednew = predict(theta, xnew)
print("\n--- Step 8: Prediction for New Input 1 ---\n")
print(y_prednew)

xnew = np.array([[0,0,0,0,0,2,8,2,0,0,1,0]])
y_prednew = predict(theta, xnew)
print("\n--- Step 9: Prediction for New Input 2 ---\n")
print(y_prednew)



--- Step 1: Original Dataset ---

   sl_no gender  ssc_p    ssc_b  hsc_p    hsc_b     hsc_s  degree_p  \
0      1      M  67.00   Others  91.00   Others  Commerce     58.00   
1      2      M  79.33  Central  78.33   Others   Science     77.48   
2      3      M  65.00  Central  68.00  Central      Arts     64.00   
3      4      M  56.00  Central  52.00  Central   Science     52.00   
4      5      M  85.80  Central  73.60  Central  Commerce     73.30   

    degree_t workex  etest_p specialisation  mba_p      status    salary  
0   Sci&Tech     No     55.0         Mkt&HR  58.80      Placed  270000.0  
1   Sci&Tech    Yes     86.5        Mkt&Fin  66.28      Placed  200000.0  
2  Comm&Mgmt     No     75.0        Mkt&Fin  57.80      Placed  250000.0  
3   Sci&Tech     No     66.0         Mkt&HR  59.43  Not Placed       NaN  
4  Comm&Mgmt     No     96.8        Mkt&Fin  55.50      Placed  425000.0  

--- Step 2: Dataset Data Types ---

gender            category
ssc_p              float