In [1]:
import sklearn.preprocessing
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import MinMaxScaler

import pandas as pd
from prepare import telco_pipeline, telco_prep, telco_modeling, telco_modelingv2
from model import get_tree, get_forest, get_logreg, get_knn, get_forest_test


In [2]:
train_X, val_X, test_X, train_y, val_y, test_y = telco_modelingv2()

In [8]:
train_X.head()

Unnamed: 0,tenure,monthly_charges,gender,senior_citizen,partner,dependents,phone_service,multiple_lines,internet_service,internet_dsl,...,contract,contract_type_one_year,contract_type_two_year,payment_method_bank_transfer,payment_method_credit_card,payment_method_electronic_check,payment_method_mailed_check,payment_type_automatic,payment_type_manual,paperless_billing
0,14,76.45,True,False,False,False,True,False,True,False,...,False,False,False,False,False,True,False,False,True,False
1,5,70.0,True,False,False,False,True,False,True,True,...,True,True,False,False,False,False,True,False,True,True
2,35,75.2,True,False,True,False,True,True,True,False,...,False,False,False,False,False,True,False,False,True,True
3,58,86.1,True,False,True,False,True,True,True,True,...,True,False,True,False,False,True,False,False,True,True
4,2,49.6,False,False,False,False,True,False,True,True,...,False,False,False,False,False,False,True,False,True,True


In [4]:
get_tree(train_X, val_X, train_y, val_y)

Decision Tree Model Accuracy on Training Data: 0.8006085192697768
Decision Tree Model Accuracy on Validation Data: 0.8058712121212122


In [5]:
#Loop to 
seed = 42
train_acc = []
val_acc = []
depth = []

for i in range(1, 5):
    
    clf = DecisionTreeClassifier(max_depth = i, random_state = seed)
    
    clf.fit(train_X, train_y)
    
    depth.append(i)
    
    train_acc.append(clf.score(train_X, train_y))
    
    val_acc.append(clf.score(val_X, val_y))
    
trees = pd.DataFrame({'max_depth': depth,
                      'train_acc': train_acc,
                      'val_acc': val_acc})

trees

Unnamed: 0,max_depth,train_acc,val_acc
0,1,0.734686,0.734848
1,2,0.762475,0.773674
2,3,0.792292,0.796402
3,4,0.794929,0.800189


In [9]:
test_X.head()

Unnamed: 0,tenure,monthly_charges,gender,senior_citizen,partner,dependents,phone_service,multiple_lines,internet_service,internet_dsl,...,contract,contract_type_one_year,contract_type_two_year,payment_method_bank_transfer,payment_method_credit_card,payment_method_electronic_check,payment_method_mailed_check,payment_type_automatic,payment_type_manual,paperless_billing
0,5,104.1,True,False,False,False,True,True,True,False,...,False,False,False,False,False,False,True,False,True,False
1,38,20.05,False,False,False,False,True,False,False,False,...,True,True,False,False,False,False,True,False,True,False
2,4,48.25,False,False,False,False,False,False,True,True,...,False,False,False,False,False,False,True,False,True,False
3,21,64.85,False,False,False,True,True,False,True,True,...,True,True,False,False,False,False,True,False,True,False
4,36,40.65,False,False,True,True,False,False,True,True,...,True,False,True,False,True,False,False,True,False,False


In [11]:
test_y.head()

1628     True
3829    False
833     False
2956    False
5446    False
Name: churn, dtype: bool

In [13]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier

def get_forest_test(train_X, test_X, train_y, test_y):
    '''get random forest accuracy on train and validate data'''
    
    # create model object and fit it to training data
    rf = RandomForestClassifier(max_depth=7, min_samples_leaf=10, random_state=42)
    rf.fit(train_X, train_y)

    # Predict churn labels and probabilities on test data
    y_pred = rf.predict(test_X)
    y_proba = rf.predict_proba(test_X)[:, 1]  # Probabilities of churn

    # Extract customer IDs from the index of test_X
    customer_ids = test_X.index

    # Create DataFrame
    result_df = pd.DataFrame({
        'customer_id': customer_ids,
        'probability_of_churn': y_proba,
        'prediction_of_churn': y_pred
    })

    # Convert boolean predictions to 0 and 1
    result_df['prediction_of_churn'] = result_df['prediction_of_churn'].astype(int)

    # Write DataFrame to CSV
    result_df.to_csv('churn_predictions.csv', index=False)

# Assuming train_X, test_X, train_y, and test_y are available
get_forest_test(train_X, test_X, train_y, test_y)


In [12]:
train_X.shape, val_X.shape, test_X.shape, train_y.shape, val_y.shape, test_y.shape

((4930, 23), (1056, 23), (1057, 23), (4930,), (1056,), (1057,))

In [None]:
#Test a combination of depths and min leaf settings. 
seed = 42
train_acc = []
val_acc = []
depth = []
leaf = []

# Extract the target column as a Series
train_y_array = train_y
val_y_array = val_y

for max_depth in range(10, 0, -1):  # Decreasing depth from 10 to 1
    for min_samples_leaf in range(1, 10):  # Increasing min_samples_leaf from 1 to 5
        rf = RandomForestClassifier(max_depth=max_depth, min_samples_leaf=min_samples_leaf, random_state=seed)
        rf.fit(train_X, train_y)
        
        depth.append(max_depth)
        leaf.append(min_samples_leaf)
        
        train_acc.append(rf.score(train_X, train_y))
        val_acc.append(rf.score(val_X, val_y))

In [None]:
#Print the results
trees = pd.DataFrame({'depth': depth,
                      'train_acc': train_acc,
                      'val_acc': val_acc,
                      'leaf': leaf})

trees.sort_values(by=['val_acc', 'train_acc', 'depth', 'leaf'], ascending=[False, False, True, True]).head()

Unnamed: 0,depth,train_acc,val_acc,leaf
8,10,0.822921,0.813447,9
35,7,0.81643,0.811553,9
37,6,0.810751,0.810606,2
38,6,0.810345,0.810606,3
22,8,0.822718,0.809659,5


In [25]:
def get_forest_test(train_X, test_X, train_y, test_y):
    '''get random forest accuracy on train and validate data'''

    # create model object and fit it to training data
    rf = RandomForestClassifier(max_depth=10, min_samples_leaf= 9, random_state=42)
    rf.fit(train_X,train_y)

    # print result
    print(f"Accuracy of Random Forest on train is {rf.score(train_X, train_y)}")
    print(f"Accuracy of Random Forest on validate is {rf.score(test_X, test_y)}")

In [24]:
get_forest_test(train_X, test_X, train_y, test_y)

Accuracy of Random Forest on train is 0.8115618661257606
Accuracy of Random Forest on validate is 0.78240302743614


In [None]:

seed = 42

best_score = 0
best_C = 0
best_train_score = 0

for C in [0.001, 0.01, 0.1, 1, 10, 100]:
    logreg = LogisticRegression(random_state=seed, C=C, max_iter=1000)
    logreg.fit(train_X, train_y)
    
    train_score = logreg.score(train_X, train_y)
    val_score = logreg.score(val_X, val_y)
    
    print(f"C = {C:.3f}, Train Score = {train_score:.4f}, Validation Score = {val_score:.4f}")
    
    if val_score > best_score:
        best_score = val_score
        best_C = C
        best_train_score = train_score

print(f"\nBest C = {best_C:.3f}, Best Train Score = {best_train_score:.4f}, Best Validation Score = {best_score:.4f}")

C = 0.001, Train Score = 0.7933, Validation Score = 0.7983
C = 0.010, Train Score = 0.8004, Validation Score = 0.7973
C = 0.100, Train Score = 0.8041, Validation Score = 0.8002
C = 1.000, Train Score = 0.8049, Validation Score = 0.8011
C = 10.000, Train Score = 0.8049, Validation Score = 0.8011
C = 100.000, Train Score = 0.8049, Validation Score = 0.8011

Best C = 1.000, Best Train Score = 0.8049, Best Validation Score = 0.8011


In [None]:
seed = 42

best_score = 0
best_C = 0
best_train_score = 0
best_penalty = ''
best_solver = ''
best_class_weight = ''
best_multi_class = ''

penalties = ['l1', 'l2']
solvers = ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']
class_weights = [None, 'balanced']
multi_classes = ['ovr', 'multinomial']

for C in [0.001, 0.01, 0.1, 1, 10, 100]:
    for penalty in penalties:
        for solver in solvers:
            for class_weight in class_weights:
                for multi_class in multi_classes:
                    try:
                        logreg = LogisticRegression(
                            random_state=seed, C=C, penalty=penalty,
                            solver=solver, class_weight=class_weight,
                            multi_class=multi_class, max_iter=500
                        )
                        logreg.fit(train_X, train_y)
                        
                        train_score = logreg.score(train_X, train_y)
                        val_score = logreg.score(val_X, val_y)
                        
                        print(f"C = {C:.3f}, Penalty = {penalty}, Solver = {solver}, Class Weight = {class_weight}, Multi-Class = {multi_class}")
                        print(f"Train Score = {train_score:.4f}, Validation Score = {val_score:.4f}")
                        
                        if val_score > best_score:
                            best_score = val_score
                            best_C = C
                            best_train_score = train_score
                            best_penalty = penalty
                            best_solver = solver
                            best_class_weight = class_weight
                            best_multi_class = multi_class
                    except ValueError as e:
                        print("Error:", e)
                        continue

print("\nBest Hyperparameters:")
print(f"Best C = {best_C:.3f}")
print(f"Best Penalty = {best_penalty}")
print(f"Best Solver = {best_solver}")
print(f"Best Class Weight = {best_class_weight}")
print(f"Best Multi-Class = {best_multi_class}")
print(f"Best Train Score = {best_train_score:.4f}")
print(f"Best Validation Score = {best_score:.4f}")


Error: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.
C = 0.001, Penalty = l1, Solver = liblinear, Class Weight = None, Multi-Class = ovr
Train Score = 0.7621, Validation Score = 0.7746
Error: Solver liblinear does not support a multinomial backend.
C = 0.001, Penalty = l1, Solver = liblinear, Class Weight = balanced, Multi-Class = ovr
Train Score = 0.6874, Validation Score = 0.6932
Error: Solver liblinear does not support a m



C = 0.001, Penalty = l2, Solver = saga, Class Weight = None, Multi-Class = ovr
Train Score = 0.7933, Validation Score = 0.7983
C = 0.001, Penalty = l2, Solver = saga, Class Weight = None, Multi-Class = multinomial
Train Score = 0.7945, Validation Score = 0.7983
C = 0.001, Penalty = l2, Solver = saga, Class Weight = balanced, Multi-Class = ovr
Train Score = 0.7422, Validation Score = 0.7386
C = 0.001, Penalty = l2, Solver = saga, Class Weight = balanced, Multi-Class = multinomial
Train Score = 0.7483, Validation Score = 0.7519
Error: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penal



C = 0.010, Penalty = l1, Solver = saga, Class Weight = None, Multi-Class = ovr
Train Score = 0.7897, Validation Score = 0.7945
C = 0.010, Penalty = l1, Solver = saga, Class Weight = None, Multi-Class = multinomial
Train Score = 0.7897, Validation Score = 0.7936
C = 0.010, Penalty = l1, Solver = saga, Class Weight = balanced, Multi-Class = ovr
Train Score = 0.7424, Validation Score = 0.7443
C = 0.010, Penalty = l1, Solver = saga, Class Weight = balanced, Multi-Class = multinomial
Train Score = 0.7428, Validation Score = 0.7443
C = 0.010, Penalty = l2, Solver = newton-cg, Class Weight = None, Multi-Class = ovr
Train Score = 0.8002, Validation Score = 0.7973
C = 0.010, Penalty = l2, Solver = newton-cg, Class Weight = None, Multi-Class = multinomial
Train Score = 0.8010, Validation Score = 0.8002
C = 0.010, Penalty = l2, Solver = newton-cg, Class Weight = balanced, Multi-Class = ovr
Train Score = 0.7552, Validation Score = 0.7500
C = 0.010, Penalty = l2, Solver = newton-cg, Class Weight = 



C = 0.010, Penalty = l2, Solver = sag, Class Weight = None, Multi-Class = ovr
Train Score = 0.8004, Validation Score = 0.7973




C = 0.010, Penalty = l2, Solver = sag, Class Weight = None, Multi-Class = multinomial
Train Score = 0.8010, Validation Score = 0.8011
C = 0.010, Penalty = l2, Solver = sag, Class Weight = balanced, Multi-Class = ovr
Train Score = 0.7550, Validation Score = 0.7500
C = 0.010, Penalty = l2, Solver = sag, Class Weight = balanced, Multi-Class = multinomial
Train Score = 0.7544, Validation Score = 0.7491




C = 0.010, Penalty = l2, Solver = saga, Class Weight = None, Multi-Class = ovr
Train Score = 0.7992, Validation Score = 0.8002




C = 0.010, Penalty = l2, Solver = saga, Class Weight = None, Multi-Class = multinomial
Train Score = 0.8016, Validation Score = 0.8002
C = 0.010, Penalty = l2, Solver = saga, Class Weight = balanced, Multi-Class = ovr
Train Score = 0.7550, Validation Score = 0.7500
C = 0.010, Penalty = l2, Solver = saga, Class Weight = balanced, Multi-Class = multinomial
Train Score = 0.7540, Validation Score = 0.7491
Error: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver lbfgs supports only 'l2' or 'none



C = 0.100, Penalty = l1, Solver = saga, Class Weight = None, Multi-Class = ovr
Train Score = 0.8063, Validation Score = 0.8011




C = 0.100, Penalty = l1, Solver = saga, Class Weight = None, Multi-Class = multinomial
Train Score = 0.8067, Validation Score = 0.8011




C = 0.100, Penalty = l1, Solver = saga, Class Weight = balanced, Multi-Class = ovr
Train Score = 0.7513, Validation Score = 0.7538




C = 0.100, Penalty = l1, Solver = saga, Class Weight = balanced, Multi-Class = multinomial
Train Score = 0.7517, Validation Score = 0.7528
C = 0.100, Penalty = l2, Solver = newton-cg, Class Weight = None, Multi-Class = ovr
Train Score = 0.8041, Validation Score = 0.8002
C = 0.100, Penalty = l2, Solver = newton-cg, Class Weight = None, Multi-Class = multinomial
Train Score = 0.8053, Validation Score = 0.8011
C = 0.100, Penalty = l2, Solver = newton-cg, Class Weight = balanced, Multi-Class = ovr
Train Score = 0.7501, Validation Score = 0.7509
C = 0.100, Penalty = l2, Solver = newton-cg, Class Weight = balanced, Multi-Class = multinomial
Train Score = 0.7501, Validation Score = 0.7519
C = 0.100, Penalty = l2, Solver = lbfgs, Class Weight = None, Multi-Class = ovr
Train Score = 0.8041, Validation Score = 0.8002
C = 0.100, Penalty = l2, Solver = lbfgs, Class Weight = None, Multi-Class = multinomial
Train Score = 0.8051, Validation Score = 0.8021
C = 0.100, Penalty = l2, Solver = lbfgs, Clas



C = 0.100, Penalty = l2, Solver = sag, Class Weight = None, Multi-Class = ovr
Train Score = 0.8043, Validation Score = 0.8002




C = 0.100, Penalty = l2, Solver = sag, Class Weight = None, Multi-Class = multinomial
Train Score = 0.8055, Validation Score = 0.8021
C = 0.100, Penalty = l2, Solver = sag, Class Weight = balanced, Multi-Class = ovr
Train Score = 0.7499, Validation Score = 0.7500
C = 0.100, Penalty = l2, Solver = sag, Class Weight = balanced, Multi-Class = multinomial
Train Score = 0.7497, Validation Score = 0.7509




C = 0.100, Penalty = l2, Solver = saga, Class Weight = None, Multi-Class = ovr
Train Score = 0.8043, Validation Score = 0.8002




C = 0.100, Penalty = l2, Solver = saga, Class Weight = None, Multi-Class = multinomial
Train Score = 0.8055, Validation Score = 0.8021




C = 0.100, Penalty = l2, Solver = saga, Class Weight = balanced, Multi-Class = ovr
Train Score = 0.7499, Validation Score = 0.7509
C = 0.100, Penalty = l2, Solver = saga, Class Weight = balanced, Multi-Class = multinomial
Train Score = 0.7495, Validation Score = 0.7509
Error: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.
C = 1.000, Penalty = l1, Solver = liblinear, Class Weight = None, Multi-Class = ovr
Train Score = 0.8051, 



C = 1.000, Penalty = l1, Solver = saga, Class Weight = None, Multi-Class = ovr
Train Score = 0.8045, Validation Score = 0.8011




C = 1.000, Penalty = l1, Solver = saga, Class Weight = None, Multi-Class = multinomial
Train Score = 0.8051, Validation Score = 0.8011




C = 1.000, Penalty = l1, Solver = saga, Class Weight = balanced, Multi-Class = ovr
Train Score = 0.7505, Validation Score = 0.7509




C = 1.000, Penalty = l1, Solver = saga, Class Weight = balanced, Multi-Class = multinomial
Train Score = 0.7495, Validation Score = 0.7519
C = 1.000, Penalty = l2, Solver = newton-cg, Class Weight = None, Multi-Class = ovr
Train Score = 0.8049, Validation Score = 0.8011
C = 1.000, Penalty = l2, Solver = newton-cg, Class Weight = None, Multi-Class = multinomial
Train Score = 0.8049, Validation Score = 0.8011
C = 1.000, Penalty = l2, Solver = newton-cg, Class Weight = balanced, Multi-Class = ovr
Train Score = 0.7497, Validation Score = 0.7528
C = 1.000, Penalty = l2, Solver = newton-cg, Class Weight = balanced, Multi-Class = multinomial
Train Score = 0.7495, Validation Score = 0.7528
C = 1.000, Penalty = l2, Solver = lbfgs, Class Weight = None, Multi-Class = ovr
Train Score = 0.8049, Validation Score = 0.8011
C = 1.000, Penalty = l2, Solver = lbfgs, Class Weight = None, Multi-Class = multinomial
Train Score = 0.8051, Validation Score = 0.8021
C = 1.000, Penalty = l2, Solver = lbfgs, Clas



C = 1.000, Penalty = l2, Solver = sag, Class Weight = None, Multi-Class = ovr
Train Score = 0.8045, Validation Score = 0.8021
C = 1.000, Penalty = l2, Solver = sag, Class Weight = None, Multi-Class = multinomial
Train Score = 0.8049, Validation Score = 0.8011




C = 1.000, Penalty = l2, Solver = sag, Class Weight = balanced, Multi-Class = ovr
Train Score = 0.7493, Validation Score = 0.7519
C = 1.000, Penalty = l2, Solver = sag, Class Weight = balanced, Multi-Class = multinomial
Train Score = 0.7495, Validation Score = 0.7528




C = 1.000, Penalty = l2, Solver = saga, Class Weight = None, Multi-Class = ovr
Train Score = 0.8047, Validation Score = 0.8011




C = 1.000, Penalty = l2, Solver = saga, Class Weight = None, Multi-Class = multinomial
Train Score = 0.8049, Validation Score = 0.8021




C = 1.000, Penalty = l2, Solver = saga, Class Weight = balanced, Multi-Class = ovr
Train Score = 0.7507, Validation Score = 0.7509




C = 1.000, Penalty = l2, Solver = saga, Class Weight = balanced, Multi-Class = multinomial
Train Score = 0.7495, Validation Score = 0.7528
Error: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.
C = 10.000, Penalty = l1, Solver = liblinear, Class Weight = None, Multi-Class = ovr
Train Score = 0.8047, Validation Score = 0.8011
Error: Solver liblinear does not support a multinomial backend.
C = 10.000, Penalty = l1, Solver = libli



C = 10.000, Penalty = l1, Solver = saga, Class Weight = None, Multi-Class = ovr
Train Score = 0.8045, Validation Score = 0.8011




C = 10.000, Penalty = l1, Solver = saga, Class Weight = None, Multi-Class = multinomial
Train Score = 0.8047, Validation Score = 0.8011




C = 10.000, Penalty = l1, Solver = saga, Class Weight = balanced, Multi-Class = ovr
Train Score = 0.7503, Validation Score = 0.7509




C = 10.000, Penalty = l1, Solver = saga, Class Weight = balanced, Multi-Class = multinomial
Train Score = 0.7495, Validation Score = 0.7528
C = 10.000, Penalty = l2, Solver = newton-cg, Class Weight = None, Multi-Class = ovr
Train Score = 0.8047, Validation Score = 0.8011
C = 10.000, Penalty = l2, Solver = newton-cg, Class Weight = None, Multi-Class = multinomial
Train Score = 0.8047, Validation Score = 0.8011
C = 10.000, Penalty = l2, Solver = newton-cg, Class Weight = balanced, Multi-Class = ovr
Train Score = 0.7495, Validation Score = 0.7528
C = 10.000, Penalty = l2, Solver = newton-cg, Class Weight = balanced, Multi-Class = multinomial
Train Score = 0.7495, Validation Score = 0.7528
C = 10.000, Penalty = l2, Solver = lbfgs, Class Weight = None, Multi-Class = ovr
Train Score = 0.8049, Validation Score = 0.8011
C = 10.000, Penalty = l2, Solver = lbfgs, Class Weight = None, Multi-Class = multinomial
Train Score = 0.8047, Validation Score = 0.8011
C = 10.000, Penalty = l2, Solver = lbf



C = 10.000, Penalty = l2, Solver = sag, Class Weight = None, Multi-Class = ovr
Train Score = 0.8047, Validation Score = 0.8011
C = 10.000, Penalty = l2, Solver = sag, Class Weight = None, Multi-Class = multinomial
Train Score = 0.8047, Validation Score = 0.8011




C = 10.000, Penalty = l2, Solver = sag, Class Weight = balanced, Multi-Class = ovr
Train Score = 0.7495, Validation Score = 0.7528
C = 10.000, Penalty = l2, Solver = sag, Class Weight = balanced, Multi-Class = multinomial
Train Score = 0.7493, Validation Score = 0.7528




C = 10.000, Penalty = l2, Solver = saga, Class Weight = None, Multi-Class = ovr
Train Score = 0.8045, Validation Score = 0.8011




C = 10.000, Penalty = l2, Solver = saga, Class Weight = None, Multi-Class = multinomial
Train Score = 0.8047, Validation Score = 0.8011




C = 10.000, Penalty = l2, Solver = saga, Class Weight = balanced, Multi-Class = ovr
Train Score = 0.7505, Validation Score = 0.7509




C = 10.000, Penalty = l2, Solver = saga, Class Weight = balanced, Multi-Class = multinomial
Train Score = 0.7493, Validation Score = 0.7528
Error: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.
Error: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.
C = 100.000, Penalty = l1, Solver = liblinear, Class Weight = None, Multi-Class = ovr
Train Score = 0.8047, Validation Score = 0.8011
Error: Solver liblinear does not support a multinomial backend.
C = 100.000, Penalty = l1, Solver = li



C = 100.000, Penalty = l1, Solver = saga, Class Weight = None, Multi-Class = ovr
Train Score = 0.8045, Validation Score = 0.8011




C = 100.000, Penalty = l1, Solver = saga, Class Weight = None, Multi-Class = multinomial
Train Score = 0.8047, Validation Score = 0.8011




C = 100.000, Penalty = l1, Solver = saga, Class Weight = balanced, Multi-Class = ovr
Train Score = 0.7505, Validation Score = 0.7509




C = 100.000, Penalty = l1, Solver = saga, Class Weight = balanced, Multi-Class = multinomial
Train Score = 0.7493, Validation Score = 0.7528
C = 100.000, Penalty = l2, Solver = newton-cg, Class Weight = None, Multi-Class = ovr
Train Score = 0.8047, Validation Score = 0.8011
C = 100.000, Penalty = l2, Solver = newton-cg, Class Weight = None, Multi-Class = multinomial
Train Score = 0.8047, Validation Score = 0.8011
C = 100.000, Penalty = l2, Solver = newton-cg, Class Weight = balanced, Multi-Class = ovr
Train Score = 0.7495, Validation Score = 0.7528
C = 100.000, Penalty = l2, Solver = newton-cg, Class Weight = balanced, Multi-Class = multinomial
Train Score = 0.7495, Validation Score = 0.7528
C = 100.000, Penalty = l2, Solver = lbfgs, Class Weight = None, Multi-Class = ovr
Train Score = 0.8049, Validation Score = 0.8011
C = 100.000, Penalty = l2, Solver = lbfgs, Class Weight = None, Multi-Class = multinomial
Train Score = 0.8049, Validation Score = 0.8011
C = 100.000, Penalty = l2, Solv



C = 100.000, Penalty = l2, Solver = sag, Class Weight = None, Multi-Class = ovr
Train Score = 0.8047, Validation Score = 0.8011
C = 100.000, Penalty = l2, Solver = sag, Class Weight = None, Multi-Class = multinomial
Train Score = 0.8047, Validation Score = 0.8011




C = 100.000, Penalty = l2, Solver = sag, Class Weight = balanced, Multi-Class = ovr
Train Score = 0.7491, Validation Score = 0.7528
C = 100.000, Penalty = l2, Solver = sag, Class Weight = balanced, Multi-Class = multinomial
Train Score = 0.7493, Validation Score = 0.7528




C = 100.000, Penalty = l2, Solver = saga, Class Weight = None, Multi-Class = ovr
Train Score = 0.8045, Validation Score = 0.8011




C = 100.000, Penalty = l2, Solver = saga, Class Weight = None, Multi-Class = multinomial
Train Score = 0.8047, Validation Score = 0.8011




C = 100.000, Penalty = l2, Solver = saga, Class Weight = balanced, Multi-Class = ovr
Train Score = 0.7505, Validation Score = 0.7509
C = 100.000, Penalty = l2, Solver = saga, Class Weight = balanced, Multi-Class = multinomial
Train Score = 0.7491, Validation Score = 0.7528

Best Hyperparameters:
Best C = 0.100
Best Penalty = l1
Best Solver = liblinear
Best Class Weight = None
Best Multi-Class = ovr
Best Train Score = 0.8069
Best Validation Score = 0.8030




In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

def get_knn(train_X, val_X, train_y, val_y):
    best_val_accuracy = 0.0
    results = []

    for n_neighbors in range(1, 21):
        for weights in ['uniform', 'distance']:
            for p in [1, 2]:
                knn_model = KNeighborsClassifier(n_neighbors=n_neighbors, weights=weights, p=p)
                knn_model.fit(train_X, train_y)

                train_predictions = knn_model.predict(train_X)
                val_predictions = knn_model.predict(val_X)

                train_accuracy = accuracy_score(train_y, train_predictions)
                val_accuracy = accuracy_score(val_y, val_predictions)

                results.append((n_neighbors, weights, p, train_accuracy, val_accuracy))

                if val_accuracy > best_val_accuracy:
                    best_val_accuracy = val_accuracy

    results.sort(key=lambda x: x[4], reverse=True)

    for result in results:
        n_neighbors, weights, p, train_accuracy, val_accuracy = result
        print(f"n_neighbors: {n_neighbors}, weights: {weights}, p: {p}")
        print(f"Train Accuracy: {train_accuracy:.4f}, Validation Accuracy: {val_accuracy:.4f}\n")

# Assuming train_X, val_X, train_y, and val_y are available
get_knn(train_X, val_X, train_y, val_y)


AttributeError: 'Flags' object has no attribute 'c_contiguous'