In [58]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, mean_squared_error, r2_score

In [42]:
df = pd.read_csv('student.csv')

In [43]:
df.head()

Unnamed: 0.1,Unnamed: 0,cgpa,iq,placement
0,0,6.8,123.0,1
1,1,5.9,106.0,0
2,2,5.3,121.0,0
3,3,7.4,132.0,1
4,4,5.8,142.0,0


In [45]:
df.drop('Unnamed: 0', axis=1, inplace=True)

In [46]:
df.head()

Unnamed: 0,cgpa,iq,placement
0,6.8,123.0,1
1,5.9,106.0,0
2,5.3,121.0,0
3,7.4,132.0,1
4,5.8,142.0,0


In [48]:
X = df.drop('placement', axis=1)
y = df['placement']

In [49]:
df.isna().sum()

cgpa         0
iq           0
placement    0
dtype: int64

In [50]:
X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.2, random_state=42)

### Classification Models

In [52]:
dtc = DecisionTreeClassifier(
    max_depth = None,
    min_samples_leaf= 1,
    min_samples_split= 2,
    random_state= 42
)
dtc.fit(X_tr, y_tr)

#Prediction
dtc_predict = dtc.predict(X_te)

#Metrics
print("Accuracy Score", accuracy_score(y_te, dtc_predict))
print("Confusion Matrix\n", confusion_matrix(y_te, dtc_predict))
print("Classification Report\n", classification_report(y_te, dtc_predict))

Accuracy Score 0.9
Confusion Matrix
 [[10  0]
 [ 2  8]]
Classification Report
               precision    recall  f1-score   support

           0       0.83      1.00      0.91        10
           1       1.00      0.80      0.89        10

    accuracy                           0.90        20
   macro avg       0.92      0.90      0.90        20
weighted avg       0.92      0.90      0.90        20



In [53]:
rfc = RandomForestClassifier(
    n_estimators=100,
    max_depth=None,
    min_samples_leaf=1,
    min_samples_split=2,
    random_state=42
)
rfc.fit(X_tr, y_tr)

#Prediction
rfc_predict = rfc.predict(X_te)

#Metrics
print("Accuracy Score", accuracy_score(y_te, rfc_predict))
print("Confusion Matrix\n", confusion_matrix(y_te, rfc_predict))
print("Classification Report\n", classification_report(y_te, rfc_predict))

Accuracy Score 0.9
Confusion Matrix
 [[10  0]
 [ 2  8]]
Classification Report
               precision    recall  f1-score   support

           0       0.83      1.00      0.91        10
           1       1.00      0.80      0.89        10

    accuracy                           0.90        20
   macro avg       0.92      0.90      0.90        20
weighted avg       0.92      0.90      0.90        20



In [54]:
lr = LogisticRegression()
lr.fit(X_tr, y_tr)

#Prediction
lr_predict = lr.predict(X_te)

#Metrics
print("Accuracy Score", accuracy_score(y_te, lr_predict))
print("Confusion Matrix\n", confusion_matrix(y_te, lr_predict))
print("Classification Report\n", classification_report(y_te, lr_predict))

Accuracy Score 0.85
Confusion Matrix
 [[9 1]
 [2 8]]
Classification Report
               precision    recall  f1-score   support

           0       0.82      0.90      0.86        10
           1       0.89      0.80      0.84        10

    accuracy                           0.85        20
   macro avg       0.85      0.85      0.85        20
weighted avg       0.85      0.85      0.85        20



### Regression Models

In [56]:
dtr = DecisionTreeRegressor(
    max_depth = None,
    min_samples_leaf= 1,
    min_samples_split= 2,
    random_state= 42
)
dtr.fit(X_tr, y_tr)

#Prediction
dtr_predict = dtr.predict(X_te)

#Metrics
print("Mean Squared Error", mean_squared_error(y_te, dtr_predict))
print("Root Mean Squared Error", np.sqrt(mean_squared_error(y_te, dtr_predict)))
print("R2 Score", r2_score(y_te, dtr_predict))


Mean Squared Error 0.1
Root Mean Squared Error 0.31622776601683794
R2 Score 0.6


In [57]:
rfr = RandomForestRegressor(
    n_estimators=100,
    max_depth=None,
    min_samples_leaf=1,
    min_samples_split=2,
    random_state=42
)
rfr.fit(X_tr, y_tr)

#Prediction
rfr_predict = rfr.predict(X_te)

#Metrics
print("Mean Squared Error", mean_squared_error(y_te, rfr_predict))
print("Root Mean Squared Error", np.sqrt(mean_squared_error(y_te, rfr_predict)))
print("R2 Score", r2_score(y_te, rfr_predict))

Mean Squared Error 0.11614499999999998
Root Mean Squared Error 0.3408005281686048
R2 Score 0.53542


In [59]:
lir = LinearRegression()
lir.fit(X_tr, y_tr)

#Prediction
lir_predict = lir.predict(X_te)

#Metrics
print("Mean Squared Error", mean_squared_error(y_te, lir_predict))
print("Root Mean Squared Error", np.sqrt(mean_squared_error(y_te, lir_predict)))
print("R2 Score", r2_score(y_te, lir_predict))

Mean Squared Error 0.0892934453473857
Root Mean Squared Error 0.29882008859409986
R2 Score 0.6428262186104572
