In [1]:
import pandas as pd
df=pd.read_csv("student_placement_data.csv")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 11 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   study_hours_per_day      1000 non-null   float64
 1   dsa_hours_per_week       1000 non-null   float64
 2   dsa_problems_solved      1000 non-null   int64  
 3   projects_count           1000 non-null   int64  
 4   internships              1000 non-null   int64  
 5   cgpa                     1000 non-null   float64
 6   mock_interviews          1000 non-null   int64  
 7   communication_score      1000 non-null   float64
 8   resume_score             1000 non-null   float64
 9   hackathons_participated  1000 non-null   int64  
 10  placement_status         1000 non-null   object 
dtypes: float64(5), int64(5), object(1)
memory usage: 86.1+ KB


In [2]:
# ===============================
# 1. Import Required Libraries
# ===============================
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


# ===============================
# 2. Load Dataset
# ===============================
df = pd.read_csv("student_placement_data.csv")

print("Dataset Loaded Successfully")
print(df.head())
print(df.info())


# ===============================
# 3. Encode Target Column
# placement_status: Placed / Not Placed
# ===============================
le = LabelEncoder()
df['placement_status'] = le.fit_transform(df['placement_status'])

# Now:
# Not Placed -> 0
# Placed     -> 1

print("\nTarget column encoded")
print(df['placement_status'].value_counts())


# ===============================
# 4. Split Features and Target
# ===============================
X = df.drop('placement_status', axis=1)
y = df['placement_status']


# ===============================
# 5. Train-Test Split
# ===============================
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42
)

print("\nData Split Completed")
print("Training samples:", X_train.shape[0])
print("Testing samples:", X_test.shape[0])


# ===============================
# 6. Feature Scaling
# ===============================
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print("\nFeature Scaling Done")


# ===============================
# 7. Train Logistic Regression Model
# ===============================
model = LogisticRegression()
model.fit(X_train, y_train)

print("\nLogistic Regression Model Trained")


# ===============================
# 8. Model Evaluation
# ===============================
y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("\nAccuracy:", accuracy)

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))


# ===============================
# 9. Predict for a New Student
# ===============================
new_student = [[
    6,    # study_hours_per_day
    10,   # dsa_hours_per_week
    250,  # dsa_problems_solved
    4,    # projects_count
    1,    # internships
    8.5,  # cgpa
    6,    # mock_interviews
    7.8,  # communication_score
    8.2,  # resume_score
    3     # hackathons_participated
]]

new_student = scaler.transform(new_student)
prediction = model.predict(new_student)

print("\nNew Student Prediction:")
if prediction[0] == 1:
    print("üéâ Student is LIKELY TO BE PLACED")
else:
    print("‚ùå Student is NOT LIKELY TO BE PLACED")


Dataset Loaded Successfully
   study_hours_per_day  dsa_hours_per_week  dsa_problems_solved  \
0                  5.2                10.2                  103   
1                  4.3                 8.8                  104   
2                  5.5                 6.2                   84   
3                  6.8                 4.1                   36   
4                  4.1                 8.1                   97   

   projects_count  internships  cgpa  mock_interviews  communication_score  \
0               3            0  9.45                4                  5.0   
1               4            0  7.75                0                  7.9   
2               0            0  7.97                0                  5.1   
3               3            0  8.67                1                  8.3   
4               1            1  7.36                4                  6.0   

   resume_score  hackathons_participated placement_status  
0           4.0                        2



In [3]:
# ===============================
# Student Placement Prediction
# Logistic Regression
# ===============================

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# -------------------------------
# 1Ô∏è‚É£ Load Dataset
# -------------------------------
df = pd.read_csv("student_placement_data.csv")

# -------------------------------
# 2Ô∏è‚É£ Encode Target Variable
# -------------------------------
df['placement_status'] = df['placement_status'].map({
    'Placed': 1,
    'Not Placed': 0
})

# -------------------------------
# 3Ô∏è‚É£ Split Features & Target
# -------------------------------
X = df.drop('placement_status', axis=1)
y = df['placement_status']

# -------------------------------
# 4Ô∏è‚É£ Train-Test Split
# -------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

# -------------------------------
# 5Ô∏è‚É£ Feature Scaling
# -------------------------------
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# -------------------------------
# 6Ô∏è‚É£ Train Logistic Regression
# -------------------------------
model = LogisticRegression()
model.fit(X_train_scaled, y_train)

# -------------------------------
# 7Ô∏è‚É£ Model Evaluation
# -------------------------------
y_pred = model.predict(X_test_scaled)

print("‚úÖ Accuracy:", accuracy_score(y_test, y_pred))
print("\nüìä Classification Report:\n")
print(classification_report(y_test, y_pred))

# -------------------------------
# 8Ô∏è‚É£ USER INPUT PREDICTION
# -------------------------------
user_input = pd.DataFrame([{
    'study_hours_per_day': 6,
    'dsa_hours_per_week': 12,
    'dsa_problems_solved': 250,
    'projects_count': 4,
    'internships': 1,
    'cgpa': 8.2,
    'mock_interviews': 5,
    'communication_score': 7.5,
    'resume_score': 8.0,
    'hackathons_participated': 2
}])

# Scale user input
user_input_scaled = scaler.transform(user_input)

# Prediction
prediction = model.predict(user_input_scaled)
probability = model.predict_proba(user_input_scaled)[0][1]

# -------------------------------
# 9Ô∏è‚É£ Output Result
# -------------------------------
print("\nüéØ Prediction Result:")

if prediction[0] == 1:
    print("
    üéâ Student is LIKELY TO BE PLACED")
else:
    print("‚ùå Student is NOT LIKELY TO BE PLACED")

print(f"üìà Placement Probability: {probability * 100:.2f}%")


‚úÖ Accuracy: 0.755

üìä Classification Report:

              precision    recall  f1-score   support

           0       0.64      0.32      0.42        57
           1       0.77      0.93      0.84       143

    accuracy                           0.76       200
   macro avg       0.71      0.62      0.63       200
weighted avg       0.74      0.76      0.72       200


üéØ Prediction Result:
üéâ Student is LIKELY TO BE PLACED
üìà Placement Probability: 98.17%
