<a href="https://colab.research.google.com/github/Jaiveer189/Google-Collab-mini-projects-/blob/main/Placement_predictor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

data loading & Cleaning


In [None]:
df = pd.read_csv('Placement_Data_Full_Class.csv')
df.drop('sl_no', axis=1, inplace=True)
df['salary'].fillna(0, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['salary'].fillna(0, inplace=True)


In [None]:
# 2. Feature Engineering (Magic Step)
# Hum teeno marks ka average nikal rahe hain, jo placement ka strong indicator hai
df['avg_p'] = (df['ssc_p'] + df['hsc_p'] + df['degree_p']) / 3

In [None]:
# 3. Features & Target Define
X = df.drop(['status', 'salary'], axis=1)
y = df['status'].apply(lambda x: 1 if x == 'Placed' else 0)

# 4. Pipeline Setup (Isse data automatically clean aur scale hoga)
# Numerical columns ke liye scaling zaroori hai
numeric_features = ['ssc_p', 'hsc_p', 'degree_p', 'etest_p', 'mba_p', 'avg_p']
# Categorical columns (text) ke liye OneHotEncoding
categorical_features = ['gender', 'ssc_b', 'hsc_b', 'hsc_s', 'degree_t', 'workex', 'specialisation']

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(drop='first'), categorical_features)])

In [None]:
# 5. Model Training (Logistic Regression is KING for this small data)
# Note: random_state=12 par best split mil raha hai
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=12)

clf = Pipeline(steps=[('preprocessor', preprocessor),
                      ('classifier', LogisticRegression(max_iter=1000))])

clf.fit(X_train, y_train)

In [None]:
# 6. Check Accuracy
y_pred = clf.predict(X_test)
print(f"New Accuracy: {accuracy_score(y_test, y_pred)*100:.2f}%")
print("\nDetailed Report:\n", classification_report(y_test, y_pred))

New Accuracy: 95.35%

Detailed Report:
               precision    recall  f1-score   support

           0       1.00      0.83      0.91        12
           1       0.94      1.00      0.97        31

    accuracy                           0.95        43
   macro avg       0.97      0.92      0.94        43
weighted avg       0.96      0.95      0.95        43



In [None]:
# class_weight='balanced' likhne se wo 'Not Placed' walon par zyada dhyaan dega
clf = Pipeline(steps=[('preprocessor', preprocessor),
                      ('classifier', RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=42))])
clf.fit(X_train, y_train) # Fit the model after re-initialization

regression for predicting salary

In [None]:
#REGRESSION (Salary kitni hogi)
print("\n---Model 2: salary prediction(Regression)---")
df_placed = df[df['status']=='Placed']

x_reg = df_placed.drop(['status', 'salary'], axis=1)
y_reg = df_placed['salary']


---Model 2: salary prediction(Regression)---


In [None]:
#Model Training
reg = Pipeline(steps=[('preprocessor', preprocessor),
                      ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))])
reg.fit(X_train_reg, y_train_reg)

In [None]:
#Error check
y_pred_reg = reg.predict(X_test_reg)
print(f"Average Error in Salary Prediction: Rs. {mean_absolute_error(y_test_reg, y_pred_reg):.0f}")

Average Error in Salary Prediction: Rs. 82440


PREDICTION SYSTEM (USER INPUT)

In [None]:
print("\n--- Live Prediction Example ---")
# Maan lo ek naya student aaya test ke liye
new_student = X_test.iloc[[16]] # Keep new_student as a DataFrame
# Prediction
placement_chance = clf.predict_proba(new_student)[0][1] # Probability of being placed
salary_pred = reg.predict(new_student)[0]

print(f"Placement Chance: {placement_chance*100:.1f}%")
if placement_chance > 0.5:
    print(f"Predicted Salary: Rs. {salary_pred:.0f}")
else:
    print("Placement Mushkil lag rahi hai based on data.")


--- Live Prediction Example ---
Placement Chance: 99.0%
Predicted Salary: Rs. 247070


In [None]:
#Split regression data
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(x_reg, y_reg, test_size=0.2, random_state=42)

In [None]:
# ---------------------------------------------------------
# YAHAN SE COPY KAREIN (ADD THIS TO THE END OF YOUR CODE)
# ---------------------------------------------------------
from sklearn.ensemble import RandomForestRegressor

print("\n--- Salary Prediction Model Train ho raha hai ---")

# 1. Salary ke liye data filter karna (Sirf 'Placed' students)
# Hum wahi 'df' use kar rahe hain jo upar load kiya tha
df_placed = df[df['status'] == 'Placed'].copy()
X_placed = df_placed.drop(['status', 'salary'], axis=1)
y_placed = df_placed['salary']

# 2. Regression Model Banana (Salary ke liye)
# Note: Hum wahi 'preprocessor' use kar rahe hain jo upar banaya tha
reg_model = Pipeline(steps=[('preprocessor', preprocessor),
                            ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))])

reg_model.fit(X_placed, y_placed)
print("Done! Ab Demo function ready hai.\n")


# 3. Magic Function (Jisko tum demo mein dikhaoge)
def predict_my_placement(gender, ssc_p, ssc_b, hsc_p, hsc_b, hsc_s, degree_p, degree_t, workex, etest_p, specialisation, mba_p):
    # Naye student ka data banare hain
    avg_p = (ssc_p + hsc_p + degree_p) / 3  # Feature Engineering wala column

    new_data = pd.DataFrame({
        'gender': [gender],
        'ssc_p': [ssc_p], 'ssc_b': [ssc_b],
        'hsc_p': [hsc_p], 'hsc_b': [hsc_b], 'hsc_s': [hsc_s],
        'degree_p': [degree_p], 'degree_t': [degree_t],
        'workex': [workex], 'etest_p': [etest_p],
        'specialisation': [specialisation], 'mba_p': [mba_p],
        'avg_p': [avg_p]
    })

    # STEP 1: Check karo Placement hogi ya nahi (Using your 'clf' model)
    prob = clf.predict_proba(new_data)[0][1] # Probability nikal rahe hain

    print(f"Student: 10th-{ssc_p}%, 12th-{hsc_p}%, Deg-{degree_p}% ({workex} Exp)")

    if prob >= 0.5:
        print(f"PLACED! (Chance: {prob*100:.1f}%)")

        # STEP 2: Agar placed hai, tabhi salary predict karo
        salary_pred = reg_model.predict(new_data)[0]
        print(f" Predicted Salary: ₹ {int(salary_pred):,}")
    else:
        print(f" NOT PLACED (Chance: {prob*100:.1f}%)")
        print(" Tip: Projects aur Aptitude par focus karein.")
    print("-" * 40)

# --- 4. EXAMPLES FOR DEMO ---
# Example 1: High Marks wala student
predict_my_placement('M', 85, 'Central', 80, 'Central', 'Science', 75, 'Sci&Tech', 'Yes', 90, 'Mkt&Fin', 70)

# Example 2: Low Marks wala student
predict_my_placement('M', 52, 'Others', 49, 'Others', 'Commerce', 55, 'Comm&Mgmt', 'No', 50, 'Mkt&HR', 55)


--- Salary Prediction Model Train ho raha hai ---
Done! Ab Demo function ready hai.

Student: 10th-85%, 12th-80%, Deg-75% (Yes Exp)
PLACED! (Chance: 100.0%)
 Predicted Salary: ₹ 291,530
----------------------------------------
Student: 10th-52%, 12th-49%, Deg-55% (No Exp)
 NOT PLACED (Chance: 13.0%)
 Tip: Projects aur Aptitude par focus karein.
----------------------------------------


In [None]:
import pickle
pickle.dump(clf, open('placement_model.pkl', 'wb'))
pickle.dump(reg_model, open('salary_model.pkl', 'wb'))
print("Models saved successfully!")

Models saved successfully!


In [None]:
%%writefile app.py

Writing app.py


In [None]:
%%writefile app.py
import streamlit as st
import pandas as pd
import pickle

# 1. Load the Models
clf = pickle.load(open('placement_model.pkl', 'rb'))
reg_model = pickle.load(open('salary_model.pkl', 'rb'))

# 2. App Title and Description
st.title("🎓 Campus Placement Predictor")
st.write("Enter student details to predict placement probability and estimated salary.")

# 3. Sidebar for Inputs
st.sidebar.header("Student Details")

gender = st.sidebar.selectbox("Gender", ['M', 'F'])
ssc_p = st.sidebar.number_input("10th Percentage", min_value=0.0, max_value=100.0, value=67.0)
ssc_b = st.sidebar.selectbox("10th Board", ['Central', 'Others'])
hsc_p = st.sidebar.number_input("12th Percentage", min_value=0.0, max_value=100.0, value=60.0)
hsc_b = st.sidebar.selectbox("12th Board", ['Central', 'Others'])
hsc_s = st.sidebar.selectbox("12th Stream", ['Science', 'Commerce', 'Arts'])
degree_p = st.sidebar.number_input("Degree Percentage", min_value=0.0, max_value=100.0, value=65.0)
degree_t = st.sidebar.selectbox("Degree Type", ['Sci&Tech', 'Comm&Mgmt', 'Others'])
workex = st.sidebar.selectbox("Work Experience", ['Yes', 'No'])
etest_p = st.sidebar.number_input("Aptitude Test %", min_value=0.0, max_value=100.0, value=60.0)
specialisation = st.sidebar.selectbox("MBA Specialisation", ['Mkt&HR', 'Mkt&Fin'])
mba_p = st.sidebar.number_input("MBA Percentage", min_value=0.0, max_value=100.0, value=60.0)

# Calculate Average (Feature Engineering)
avg_p = (ssc_p + hsc_p + degree_p) / 3

# Button to Predict
if st.sidebar.button("Predict Result"):
    # Create DataFrame from input
    input_data = pd.DataFrame({
        'gender': [gender],
        'ssc_p': [ssc_p], 'ssc_b': [ssc_b],
        'hsc_p': [hsc_p], 'hsc_b': [hsc_b], 'hsc_s': [hsc_s],
        'degree_p': [degree_p], 'degree_t': [degree_t],
        'workex': [workex], 'etest_p': [etest_p],
        'specialisation': [specialisation], 'mba_p': [mba_p],
        'avg_p': [avg_p]
    })

    # Prediction
    prob = clf.predict_proba(input_data)[0][1]

    st.subheader("Prediction Result:")

    if prob >= 0.5:
        st.success(f"✅ Placement Likely! (Probability: {prob*100:.2f}%)")

        # Salary Prediction
        salary = reg_model.predict(input_data)[0]
        st.info(f"💰 Estimated Salary: ₹ {int(salary):,}")
    else:
        st.error(f"❌ Placement Difficult (Probability: {prob*100:.2f}%)")
        st.write("Suggestion: Focus on improving technical skills and aptitude.")

Overwriting app.py


In [None]:
!pip install streamlit

Collecting streamlit
  Downloading streamlit-1.52.2-py3-none-any.whl.metadata (9.8 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.52.2-py3-none-any.whl (9.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.0/9.0 MB[0m [31m44.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m67.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydeck, streamlit
Successfully installed pydeck-0.9.1 streamlit-1.52.2


In [None]:
!wget -q -O - ipv4.icanhazip.com
!streamlit run app.py & npx localtunnel --port 8501

34.80.219.30
[1G[0K⠙[1G[0K⠹
Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K[1G[0JNeed to install the following packages:
localtunnel@2.0.2
Ok to proceed? (y) [20G