In [1]:
# ==============================
# 1. Import Libraries
# ==============================
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


# ==============================
# 2. Load the Dataset
# ==============================
data = pd.read_csv("Placement_Data.csv")
print("Original Data (First 5 Rows):")
print(data.head(), '\n')


# ==============================
# 3. Data Cleaning
# ==============================
data1 = data.copy()
data1 = data1.drop(["sl_no", "salary"], axis=1)  # Dropping unnecessary columns
print("Data After Dropping 'sl_no' and 'salary':")
print(data1.head(), '\n')

print("Missing Values in Each Column:")
print(data1.isnull().sum(), '\n')

print("Number of Duplicate Rows:")
print(data1.duplicated().sum(), '\n')


# ==============================
# 4. Encoding Categorical Variables
# ==============================
le = LabelEncoder()
data1["gender"] = le.fit_transform(data1["gender"])
data1["ssc_b"] = le.fit_transform(data1["ssc_b"])
data1["hsc_b"] = le.fit_transform(data1["hsc_b"])
data1["hsc_s"] = le.fit_transform(data1["hsc_s"])
data1["degree_t"] = le.fit_transform(data1["degree_t"])
data1["workex"] = le.fit_transform(data1["workex"])
data1["specialisation"] = le.fit_transform(data1["specialisation"])
data1["status"] = le.fit_transform(data1["status"])

print("Data After Label Encoding:")
print(data1.head(), '\n')


# ==============================
# 5. Feature Selection
# ==============================
x = data1.iloc[:, :-1]   # Independent variables
y = data1["status"]      # Target variable

print("Features (X):")
print(x.head(), '\n')

print("Target (Y):")
print(y.head(), '\n')


# ==============================
# 6. Train-Test Split
# ==============================
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.2, random_state=0
)


# ==============================
# 7. Model Training
# ==============================
lr = LogisticRegression(solver="liblinear")
lr.fit(x_train, y_train)


# ==============================
# 8. Model Prediction
# ==============================
y_pred = lr.predict(x_test)
print("Predicted Values:")
print(y_pred, '\n')


# ==============================
# 9. Model Evaluation
# ==============================
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy Score:", accuracy, '\n')

confusion = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(confusion, '\n')

classification_report1 = classification_report(y_test, y_pred)
print("Classification Report:")
print(classification_report1)


# ==============================
# 10. Custom Prediction
# ==============================
# Example input: [gender, ssc_p, ssc_b, hsc_p, hsc_b, hsc_s, degree_p, degree_t, workex, etest_p, specialisation, mba_p]
custom_input = [[1, 80, 1, 90, 1, 1, 90, 1, 0, 85, 1, 85]]
prediction = lr.predict(custom_input)
print("Custom Prediction (Placement Status):", prediction)


Original Data (First 5 Rows):
   sl_no gender  ssc_p    ssc_b  hsc_p    hsc_b     hsc_s  degree_p  \
0      1      M  67.00   Others  91.00   Others  Commerce     58.00   
1      2      M  79.33  Central  78.33   Others   Science     77.48   
2      3      M  65.00  Central  68.00  Central      Arts     64.00   
3      4      M  56.00  Central  52.00  Central   Science     52.00   
4      5      M  85.80  Central  73.60  Central  Commerce     73.30   

    degree_t workex  etest_p specialisation  mba_p      status    salary  
0   Sci&Tech     No     55.0         Mkt&HR  58.80      Placed  270000.0  
1   Sci&Tech    Yes     86.5        Mkt&Fin  66.28      Placed  200000.0  
2  Comm&Mgmt     No     75.0        Mkt&Fin  57.80      Placed  250000.0  
3   Sci&Tech     No     66.0         Mkt&HR  59.43  Not Placed       NaN  
4  Comm&Mgmt     No     96.8        Mkt&Fin  55.50      Placed  425000.0   

Data After Dropping 'sl_no' and 'salary':
  gender  ssc_p    ssc_b  hsc_p    hsc_b     hsc_

