In [1]:
import numpy as np
import pandas as pd


In [2]:
data = pd.read_csv("./collegePlace.csv")

In [3]:
data

Unnamed: 0,Age,Gender,Stream,Internships,CGPA,Hostel,HistoryOfBacklogs,PlacedOrNot
0,22,Male,Electronics And Communication,1,8,1,1,1
1,21,Female,Computer Science,0,7,1,1,1
2,22,Female,Information Technology,1,6,0,0,1
3,21,Male,Information Technology,0,8,0,1,1
4,22,Male,Mechanical,0,8,1,0,1
...,...,...,...,...,...,...,...,...
2961,23,Male,Information Technology,0,7,0,0,0
2962,23,Male,Mechanical,1,7,1,0,0
2963,22,Male,Information Technology,1,7,0,0,0
2964,22,Male,Computer Science,1,7,0,0,0


In [4]:
from sklearn.model_selection import train_test_split


In [5]:
from sklearn.preprocessing import LabelEncoder

In [6]:
label = LabelEncoder()

In [7]:
for i in data.select_dtypes(include="object").columns:
    data[i] = label.fit_transform(data[i])
    

In [8]:
data

Unnamed: 0,Age,Gender,Stream,Internships,CGPA,Hostel,HistoryOfBacklogs,PlacedOrNot
0,22,1,3,1,8,1,1,1
1,21,0,1,0,7,1,1,1
2,22,0,4,1,6,0,0,1
3,21,1,4,0,8,0,1,1
4,22,1,5,0,8,1,0,1
...,...,...,...,...,...,...,...,...
2961,23,1,4,0,7,0,0,0
2962,23,1,5,1,7,1,0,0
2963,22,1,4,1,7,0,0,0
2964,22,1,1,1,7,0,0,0


In [9]:
X = data.iloc[: , :-1]

In [10]:
X

Unnamed: 0,Age,Gender,Stream,Internships,CGPA,Hostel,HistoryOfBacklogs
0,22,1,3,1,8,1,1
1,21,0,1,0,7,1,1
2,22,0,4,1,6,0,0
3,21,1,4,0,8,0,1
4,22,1,5,0,8,1,0
...,...,...,...,...,...,...,...
2961,23,1,4,0,7,0,0
2962,23,1,5,1,7,1,0
2963,22,1,4,1,7,0,0
2964,22,1,1,1,7,0,0


In [11]:
y = data.iloc[:,-1]

In [12]:
print(y)

0       1
1       1
2       1
3       1
4       1
       ..
2961    0
2962    0
2963    0
2964    0
2965    1
Name: PlacedOrNot, Length: 2966, dtype: int64


In [13]:
x_train , x_test , y_train , y_test = train_test_split(X , y , test_size=0.2 , random_state=42)


In [14]:
x_train.shape

(2372, 7)

In [15]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report


In [34]:
models = [
    (
        "Logistic Regression",
        LogisticRegression(C=1, solver="liblinear")
        
    ),
    (
        "Random Forest",
        RandomForestClassifier(n_estimators=10, random_state=40)
        
    ),
    (
        "Decision Tree",
        DecisionTreeClassifier(max_depth=5, random_state=40)
    )
]

# --------------------------------------------------

In [35]:
reports = []

for model_name , model in models:
    model.fit(x_train , y_train)
    y_pred = model.predict(x_test)
    report = classification_report(y_test , y_pred , output_dict=True)
    reports.append(report)

In [36]:
reports

[{'0': {'precision': 0.7148014440433214,
   'recall': 0.7071428571428572,
   'f1-score': 0.7109515260323159,
   'support': 280.0},
  '1': {'precision': 0.7413249211356467,
   'recall': 0.7484076433121019,
   'f1-score': 0.7448494453248812,
   'support': 314.0},
  'accuracy': 0.7289562289562289,
  'macro avg': {'precision': 0.7280631825894841,
   'recall': 0.7277752502274795,
   'f1-score': 0.7279004856785986,
   'support': 594.0},
  'weighted avg': {'precision': 0.7288222720012173,
   'recall': 0.7289562289562289,
   'f1-score': 0.7288706281499346,
   'support': 594.0}},
 {'0': {'precision': 0.8403908794788274,
   'recall': 0.9214285714285714,
   'f1-score': 0.879045996592845,
   'support': 280.0},
  '1': {'precision': 0.9233449477351916,
   'recall': 0.8439490445859873,
   'f1-score': 0.8818635607321131,
   'support': 314.0},
  'accuracy': 0.8804713804713805,
  'macro avg': {'precision': 0.8818679136070096,
   'recall': 0.8826888080072793,
   'f1-score': 0.880454778662479,
   'support

In [37]:
import mlflow

In [38]:
mlflow.set_experiment("Placement Anaylsis")  # Experiment Name 
mlflow.set_tracking_uri(uri ="http://127.0.0.1:5000/")   # Tracking Location Where We Wanna Track 

for i , element in enumerate(models):
    model_name = element[0]
    model = element[1]
    report = reports[i]

    with mlflow.start_run(run_name=model_name):
        mlflow.log_param('model_name' , model_name)
        mlflow.log_metric('accuracy' , report['accuracy'])
        

        mlflow.sklearn.log_model(model , "model")



🏃 View run Logistic Regression at: http://127.0.0.1:5000/#/experiments/847679343800338307/runs/d61126abff5542eeb77aa3d75a7009ad
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/847679343800338307




🏃 View run Random Forest at: http://127.0.0.1:5000/#/experiments/847679343800338307/runs/ab240591cda940fe96b85fa14eee1144
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/847679343800338307




🏃 View run Decision Tree at: http://127.0.0.1:5000/#/experiments/847679343800338307/runs/92f826fd8bf141449a8f4cb41ec87ae5
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/847679343800338307
