In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_auc_score
data = load_breast_cancer()
X = data.data
y = data.target
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
print("ROC AUC:", roc_auc_score(y_test, model.predict_proba(X_test)[:,1]))
df = pd.read_csv("diabetes.csv")
df.head()
X = df.drop("Outcome", axis=1)
y = df["Outcome"]
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
print("ROC AUC:", roc_auc_score(y_test, model.predict_proba(X_test)[:,1]))
from sklearn.tree import DecisionTreeClassifier

model1 = DecisionTreeClassifier(random_state=42)
model1.fit(X_train, y_train)

y_pred1 = model1.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred1))
model2 = DecisionTreeClassifier(max_depth=3, random_state=42)
model2.fit(X_train, y_train)

y_pred2 = model2.predict(X_test)
print("Restricted Accuracy:", accuracy_score(y_test, y_pred2))
importance = pd.DataFrame(
    model1.feature_importances_,
    index=X.columns,
    columns=["Importance"]
)

print(importance.sort_values(by="Importance", ascending=False))
df = pd.read_csv("titanic.csv")


Accuracy: 0.9649122807017544
Confusion Matrix:
 [[40  3]
 [ 1 70]]
Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.93      0.95        43
           1       0.96      0.99      0.97        71

    accuracy                           0.96       114
   macro avg       0.97      0.96      0.96       114
weighted avg       0.97      0.96      0.96       114

ROC AUC: 0.9952505732066819
Accuracy: 0.7532467532467533
[[79 20]
 [18 37]]
              precision    recall  f1-score   support

           0       0.81      0.80      0.81        99
           1       0.65      0.67      0.66        55

    accuracy                           0.75       154
   macro avg       0.73      0.74      0.73       154
weighted avg       0.76      0.75      0.75       154

ROC AUC: 0.8146923783287419
Accuracy: 0.7467532467532467
Restricted Accuracy: 0.7597402597402597
                          Importance
Glucose                     0.314225
BMI     

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_auc_score

# Load dataset
df = pd.read_csv("diabetes.csv")

# Replace zero values
cols = ['Glucose','BloodPressure','SkinThickness','Insulin','BMI']
df[cols] = df[cols].replace(0, np.nan)
df.fillna(df.mean(), inplace=True)

# Features & target
X = df.drop("Outcome", axis=1)
y = df["Outcome"]

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train model
lr = LogisticRegression()
lr.fit(X_train, y_train)

# Prediction
y_pred = lr.predict(X_test)

# Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
print("ROC-AUC:", roc_auc_score(y_test, lr.predict_proba(X_test)[:, 1]))

# Coefficients
coeff = pd.DataFrame(lr.coef_[0], X.columns, columns=["Coefficient"])
print(coeff)

Accuracy: 0.7532467532467533
[[82 17]
 [21 34]]
              precision    recall  f1-score   support

           0       0.80      0.83      0.81        99
           1       0.67      0.62      0.64        55

    accuracy                           0.75       154
   macro avg       0.73      0.72      0.73       154
weighted avg       0.75      0.75      0.75       154

ROC-AUC: 0.8242424242424242
                          Coefficient
Pregnancies                  0.224953
Glucose                      1.083654
BloodPressure               -0.145489
SkinThickness                0.068611
Insulin                     -0.096974
BMI                          0.679456
DiabetesPedigreeFunction     0.199995
Age                          0.394776


In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_auc_score
from xgboost import XGBClassifier

# Load dataset
df = pd.read_csv("titanic.csv")

# Drop unnecessary columns
df.drop(['Name','Ticket','Cabin'], axis=1, inplace=True)

# Handle missing values
df.fillna(df.mean(numeric_only=True), inplace=True)

# Convert categorical to numeric
df = pd.get_dummies(df, drop_first=True)

# Features & target
X = df.drop("Survived", axis=1)
y = df["Survived"]

# Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train XGBoost
xgb = XGBClassifier(n_estimators=100, max_depth=3, learning_rate=0.1)
xgb.fit(X_train, y_train)

# Prediction
y_pred = xgb.predict(X_test)

# Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
print("ROC-AUC:", roc_auc_score(y_test, xgb.predict_proba(X_test)[:, 1]))

# Feature importance
print("Feature Importance:", xgb.feature_importances_)

Accuracy: 0.8044692737430168
[[92 13]
 [22 52]]
              precision    recall  f1-score   support

           0       0.81      0.88      0.84       105
           1       0.80      0.70      0.75        74

    accuracy                           0.80       179
   macro avg       0.80      0.79      0.79       179
weighted avg       0.80      0.80      0.80       179

ROC-AUC: 0.8664736164736164
Feature Importance: [0.03128337 0.1732136  0.04473273 0.05488632 0.01882255 0.04726624
 0.53441495 0.0373336  0.05804675]


In [4]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Normal Decision Tree
dt1 = DecisionTreeClassifier(random_state=42)
dt1.fit(X_train, y_train)
y_pred1 = dt1.predict(X_test)

print("Decision Tree Accuracy:", accuracy_score(y_test, y_pred1))

# Restricted Depth Tree
dt2 = DecisionTreeClassifier(max_depth=3, random_state=42)
dt2.fit(X_train, y_train)
y_pred2 = dt2.predict(X_test)

print("Restricted Tree Accuracy:", accuracy_score(y_test, y_pred2))

# Feature Importance
importance = pd.DataFrame(
    dt1.feature_importances_,
    index=X.columns,
    columns=["Importance"]
)
print(importance.sort_values(by="Importance", ascending=False))

Decision Tree Accuracy: 0.7541899441340782
Restricted Tree Accuracy: 0.7988826815642458
             Importance
Sex_male       0.293493
PassengerId    0.235031
Fare           0.170791
Age            0.118433
Pclass         0.111070
SibSp          0.032547
Parch          0.019863
Embarked_S     0.016207
Embarked_Q     0.002564
