In [393]:
import numpy as np

import pandas as pd

from sklearn.model_selection import train_test_split

from sklearn.neighbors import KNeighborsClassifier

from sklearn.tree import DecisionTreeClassifier

from sklearn.ensemble import VotingClassifier

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score



In [395]:
df = pd.read_csv("APMM10-clf-dataset11.csv")
df.head()

Unnamed: 0,X1,X2,y
0,0.854774,0.243674,0
1,0.751766,-0.199042,1
2,0.435498,0.388439,0
3,0.321976,-0.71864,1
4,0.768068,-0.391255,1


In [397]:
X = df[['X1','X2']]
y =df['y']

In [399]:
X_train, X_test, y_train, y_test = train_test_split(
    X,y,test_size=0.25, random_state=42, stratify=y)
knn3 = KNeighborsClassifier(n_neighbors=3)
knn4 = KNeighborsClassifier(n_neighbors=4)
knn5 = KNeighborsClassifier(n_neighbors=5)
dt3 = DecisionTreeClassifier(max_depth=3, random_state=42)
dt4 = DecisionTreeClassifier(max_depth=4, random_state=42)

voting_clf = VotingClassifier(
    estimators=[
    ("knn3", knn3),
    ("knn4", knn4),
    ("knn5", knn5),
    ("dt3", dt3),
    ("dt4", dt4)
    ],
    voting='hard'
)

voting_clf.fit(X_train, y_train)

y_pred = voting_clf.predict(X_test)

acc = accuracy_score(y_test,y_pred)

print(acc)

0.96


In [401]:
from sklearn.metrics import f1_score

F1 = f1_score(y_test, y_pred,)
print(f"{F1:.3f}")  

0.961


In [403]:
models = {
    'KNN-3': knn3,
    'KNN-4': knn4,
    'KNN-5': knn5,
    'DT-3': dt3,
    'DT-4': dt4,
    'Voting': voting_clf
}

# Store precision scores
precision_scores = {}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    # Average='binary' assumes a binary classification with labels 0 and 1
    precision = precision_score(y_test, y_pred)
    precision_scores[name] = precision
    print(f"{name} precision: {str(precision)[:5]}")  # print first 3 decimal digits

# Find model with best precision
best_model = max(precision_scores, key=precision_scores.get)
print(f"\nBest precision: {best_model} - {str(precision_scores[best_model])[:5]}")




KNN-3 precision: 0.924
KNN-4 precision: 0.953
KNN-5 precision: 0.938
DT-3 precision: 0.921
DT-4 precision: 0.921
Voting precision: 0.938

Best precision: KNN-4 - 0.953


In [419]:
knn3 = KNeighborsClassifier(n_neighbors=3)
knn4 = KNeighborsClassifier(n_neighbors=4)
knn5 = KNeighborsClassifier(n_neighbors=5)
dt3 = DecisionTreeClassifier(max_depth=3, random_state=42)
dt4 = DecisionTreeClassifier(max_depth=4, random_state=42)

# Train and evaluate each model
models = {
    'KNN (k=3)': knn3,
    'KNN (k=4)': knn4,
    'KNN (k=5)': knn5,
    'DT (depth=3)': dt3,
    'DT (depth=4)': dt4
}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    precision = precision_score(y_test, y_pred)
    print(f"{name}: Weighted Precision = {precision:.3f}")

# Voting Classifier (majority voting)
voting_model = VotingClassifier(
    estimators=[
        ('knn3', knn3),
        ('knn4', knn4),
        ('knn5', knn5),
        ('dt3', dt3),
        ('dt4', dt4)
    ],
    voting='hard'
)

voting_model.fit(X_train, y_train)
y_pred_voting = voting_model.predict(X_test)
precision_voting = precision_score(y_test, y_pred_voting)
print(f"VotingClassifier Ensemble: Weighted Precision = {precision_voting:.3f}")

KNN (k=3): Weighted Precision = 0.924
KNN (k=4): Weighted Precision = 0.953
KNN (k=5): Weighted Precision = 0.938
DT (depth=3): Weighted Precision = 0.922
DT (depth=4): Weighted Precision = 0.922
VotingClassifier Ensemble: Weighted Precision = 0.938


In [421]:
recall_scores = {}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    recall_scores[name] = recall_score(y_test, y_pred)

# Voting classifier
voting = VotingClassifier(estimators=[
    ('knn3', knn3), ('knn4', knn4), ('knn5', knn5), ('dt3', dt3), ('dt4', dt4)
], voting='hard')

voting.fit(X_train, y_train)
y_pred_vote = voting.predict(X_test)
recall_scores["Voting"] = recall_score(y_test, y_pred_vote)

# Print all recall scores (3 decimal places)
for model, score in recall_scores.items():
    print(f"{model}: {score:.5f}")

# Find the best
best_model = max(recall_scores, key=recall_scores.get)
print(f"\n✅ Best Recall Score: {best_model} with {recall_scores[best_model]:.5f}")


KNN (k=3): 0.98387
KNN (k=4): 0.98387
KNN (k=5): 0.98387
DT (depth=3): 0.95161
DT (depth=4): 0.95161
Voting: 0.98387

✅ Best Recall Score: KNN (k=3) with 0.98387


In [62]:
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import VotingRegressor
from sklearn.metrics import mean_squared_error

In [423]:
df = pd.read_csv("APMM10-reg-dataset13.csv")
df.head()


Unnamed: 0,X1,X2,X3,X4,X5,y
0,-0.778877,0.227707,-0.21725,0.117715,-0.416022,353.628836
1,-0.984763,-1.000755,0.128196,0.588786,-1.975529,152.872646
2,-0.729922,0.609208,1.186181,-0.594182,-0.627713,350.769222
3,0.777258,-1.014609,-0.970274,0.524534,-1.580636,236.262018
4,2.046403,-0.406842,-0.736017,0.204004,-0.038934,433.694223


In [425]:
X = df.iloc[:, :-1]
y = df.iloc[:,-1]

In [427]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42)

lr = LinearRegression()
knn = KNeighborsRegressor(n_neighbors=5)
dt = DecisionTreeRegressor(max_depth=5, random_state=42)

voting_reg = VotingRegressor(estimators=[
    ("lr",lr),
    ('knn', knn),
    ('dt', dt)
])

voting_reg.fit(X_train, y_train)
y_pred = voting_reg.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print(f"{rmse:.3f}")
    

59.028


In [429]:
lr.fit(X_train, y_train)
knn.fit(X_train, y_train)
dt.fit(X_train, y_train)

# Voting Regressor
voting = VotingRegressor(estimators=[
    ('lr', lr), ('knn', knn), ('dt', dt)
])
voting.fit(X_train, y_train)

# Predictions
preds = {
    'LinearRegression': lr.predict(X_test),
    'KNeighborsRegressor': knn.predict(X_test),
    'DecisionTreeRegressor': dt.predict(X_test),
    'VotingRegressor': voting.predict(X_test)
}

# RMSE
for name, pred in preds.items():
    rmse = np.sqrt(mean_squared_error(y_test, pred))
    print(f"{name}: {rmse:.4f}")

LinearRegression: 54.8731
KNeighborsRegressor: 67.7482
DecisionTreeRegressor: 70.5027
VotingRegressor: 59.0282


In [74]:
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, ConfusionMatrixDisplay

In [431]:
df = pd.read_csv("APMM10-clf-dataset14.csv")

In [433]:
df.head()

Unnamed: 0,X1,X2,X3,X4,X5,X6,y
0,-0.42486,-0.579713,-1.337395,0.362592,0.117978,-1.414848,0
1,0.540444,0.844824,1.151952,-1.20107,0.341755,-0.841155,0
2,1.871295,-3.570698,-1.304408,1.673602,-0.026235,3.03547,0
3,0.23472,-3.1485,-2.922666,1.387281,1.525623,2.64561,0
4,-1.774487,1.968075,0.657095,0.739276,-1.782984,-0.678737,0


In [435]:
X = df.iloc[:, :-1]
y = df.iloc[:,-1]

In [437]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3,stratify=y, random_state=42)

In [451]:
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Predict and calculate accuracy
y_pred = rf.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print(f"Accuracy: {acc:.4f}")

Accuracy: 0.8600


In [455]:
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Step 5: Predict and calculate weighted F1 score
y_pred = model.predict(X_test)
f1 = f1_score(y_test, y_pred, average='weighted')

# Step 6: Print the result
print(f"Weighted F1 Score: {f1:.4f}")


Weighted F1 Score: 0.8531


In [467]:
acc = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')
cm = confusion_matrix(y_test, y_pred)
tp_label_0 = cm[0][0]  # True Positives for label 0

# Print results
print("Accuracy:", acc)
print("Weighted F1 Score:", f1)
print("True Positives for label 0:", tp_label_0)
print(cm)
FN_0 = cm[0][1]+cm[0][2]
print(f'false nagetive is: {FN_0}')


Accuracy: 0.86
Weighted F1 Score: 0.8531489759024178
True Positives for label 0: 208
[[208  10   2]
 [ 13  34   1]
 [ 14   2  16]]
false nagetive is: 12


In [204]:
from sklearn.tree import DecisionTreeClassifier

from sklearn.ensemble import AdaBoostClassifier

#Build a boosting classifier ensemble using the 
#AdaBoostClassifier() method. Use a DecisionTreeClassifier with max_depth=1 
#and random_state=42 as the “stump”. Initialize the AdaBoostClassifier() 
# with 60 estimators, a learning_rate of 1.0, and a random_state of 42. 

#Perform a train-test split using a 70-30 split and ensure stratify=yes.

In [479]:
df = pd.read_csv('APMM10-clf-dataset17.csv')
df.head()

Unnamed: 0,X1,X2,X3,X4,y
0,-1.194848,1.138331,-1.482079,1.025358,0
1,0.290131,0.317681,-0.996519,0.477009,0
2,-0.522737,-0.121434,0.765888,-0.308384,0
3,1.738138,-0.188065,-1.195371,0.302161,0
4,1.828884,-0.289261,-1.049151,0.206271,0


In [481]:
X = df.drop('y', axis=1)

In [483]:
y = df['y']

In [485]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y)

In [487]:
stump = DecisionTreeClassifier(max_depth=1, random_state=42)

model = AdaBoostClassifier(
    estimator=stump, 
    n_estimators=60, 
    learning_rate=1.0,
    random_state=42
)

model.fit(X_train, y_train)
y_pred =  model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"accuracy:{accuracy:.3f}")
f1 = f1_score(y_test,y_pred,average='weighted')
print(f"Weighted F1:{f1:.3f}")


accuracy:0.953
Weighted F1:0.951


In [493]:
stump.fit(X_train, y_train)
y_pred_stump = stump.predict(X_test)
recall = recall_score(y_test, y_pred_stump, average='weighted')
print(f"Weighted stump recall: {recall:.3f}")
precision = precision_score(y_test, y_pred_stump, average='weighted')
print(f"weighted stump precision:{precision:.3f}")

Weighted stump recall: 0.923
weighted stump precision:0.927


In [255]:
print("Unique in y_test:", np.unique(y_test))
print("Unique in y_pred:", np.unique(y_pred))


Unique in y_test: [0 1]
Unique in y_pred: [0 1]


In [499]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import StackingClassifier
df =pd.read_csv('APMM10-clf-dataset18.csv')
X = df.drop('y', axis=1)
y = df['y']
df.head()

Unnamed: 0,X1,X2,X3,X4,X5,X6,y
0,0.700765,-1.958677,0.22434,-0.14538,-1.309757,-0.401478,0
1,-0.20196,0.749327,0.849612,0.656523,1.458497,1.04978,1
2,2.01172,-1.810551,0.43477,0.615294,0.947045,0.241003,1
3,1.580108,-2.191164,0.159463,0.613825,0.306174,0.428269,0
4,-1.232211,2.800755,0.765878,0.617917,2.319216,1.2943,1


In [501]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, stratify=y, random_state=42)

estimators = [
    ('lr', LogisticRegression(max_iter=1000)),
    ('dt3', DecisionTreeClassifier(max_depth=3, random_state=42)),
    ('dt5', DecisionTreeClassifier(max_depth=5, random_state=42)),
    ('knn5', KNeighborsClassifier(n_neighbors=5)),
    ('knn7', KNeighborsClassifier(n_neighbors=7))
]

In [503]:
stacking_model = StackingClassifier(
    estimators = estimators, 
    final_estimator=LogisticRegression(),
    cv=5
)
stacking_model.fit(X_train, y_train)
y_pred = stacking_model.predict(X_test)

acc=accuracy_score(y_test, y_pred)
f1=f1_score(y_test, y_pred, average='weighted')
ps=precision_score(y_test, y_pred, average='weighted')
recall=recall_score(y_test, y_pred, average='weighted')

print(f"Stacking Accuracy:{acc:.3f}" )
print(f"Stacking F1 (Weighted):{f1:.3f}" )
print(f"Stacking Precision (Weighted):{ps:.3f}")
print(f"Stacking Recall (Weighted):{recall:.3f}")

Stacking Accuracy:0.940
Stacking F1 (Weighted):0.940
Stacking Precision (Weighted):0.940
Stacking Recall (Weighted):0.940


In [505]:
models = {
    'LogisticRegression': LogisticRegression(max_iter=1000),
    'DecisionTree (depth=3)': DecisionTreeClassifier(max_depth=3, random_state=42),
    'DecisionTree (depth=5)': DecisionTreeClassifier(max_depth=5, random_state=42),
    'KNN (k=5)': KNeighborsClassifier(n_neighbors=5),
    'KNN (k=7)': KNeighborsClassifier(n_neighbors=7)
}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    precision = precision_score(y_test, y_pred, average='weighted')
    print(f"{name}: Weighted Precision = {precision:.3f}")


LogisticRegression: Weighted Precision = 0.900
DecisionTree (depth=3): Weighted Precision = 0.924
DecisionTree (depth=5): Weighted Precision = 0.930
KNN (k=5): Weighted Precision = 0.940
KNN (k=7): Weighted Precision = 0.944


In [507]:
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    recall = recall_score(y_test, y_pred, average="weighted")
    print(f"{name}: weighted recall ={recall:.3f}")
    

LogisticRegression: weighted recall =0.900
DecisionTree (depth=3): weighted recall =0.923
DecisionTree (depth=5): weighted recall =0.930
KNN (k=5): weighted recall =0.940
KNN (k=7): weighted recall =0.943


In [None]:
0.96
0.961
KNeighborsClassifier() K=4
4. K=3, K=4, K=5, VotingClassifier
59.028
6. decisionTreeRegressor()
0.86
0.853
208
12
0.953
0.951
0.927
0.923
0.940
0.940
logisticRegression()
KneighborsClassifier k=7