In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Load datasets
train_data = pd.read_csv('/kaggle/input/fds-assignment/trainfile.csv')
test_data = pd.read_csv('/kaggle/input/fds-assignment/testfile.csv')

# Handle missing values
train_data['OwnHouse'].fillna(train_data['OwnHouse'].mode()[0], inplace=True)
train_data['HasCrCard'].fillna(train_data['HasCrCard'].mode()[0], inplace=True)
test_data['OwnHouse'].fillna(test_data['OwnHouse'].mode()[0], inplace=True)

# Encode categorical variables
label_encoder = LabelEncoder()
categorical_columns = ['Geography', 'Gender', 'Occupation']
for column in categorical_columns:
    train_data[column] = label_encoder.fit_transform(train_data[column])
    test_data[column] = label_encoder.transform(test_data[column])

# Scale numerical features
scaler = StandardScaler()
numerical_columns = ['CreditScore', 'Age', 'Tenure', 'Balance', 'EstimatedSalary']
train_data[numerical_columns] = scaler.fit_transform(train_data[numerical_columns])
test_data[numerical_columns] = scaler.transform(test_data[numerical_columns])

# Prepare data for modeling
X = train_data.drop(columns=['row ID', 'CustomerId', 'Surname', 'Exited'])
y = train_data['Exited']
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Prepare test features
test_features = test_data.drop(columns=['row ID', 'CustomerId', 'Surname'])



In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Iteration 1
knn_model = KNeighborsClassifier(n_neighbors=5, weights='uniform', p=2)
knn_model.fit(X_train, y_train)
test_predictions = knn_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('knn_submission1.csv', index=False)

# Iteration 2
knn_model = KNeighborsClassifier(n_neighbors=3, weights='distance', p=2)
knn_model.fit(X_train, y_train)
test_predictions = knn_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('knn_submission2.csv', index=False)

# Iteration 3
knn_model = KNeighborsClassifier(n_neighbors=10, weights='uniform', p=1)
knn_model.fit(X_train, y_train)
test_predictions = knn_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('knn_submission3.csv', index=False)

# Iteration 4
knn_model = KNeighborsClassifier(n_neighbors=7, weights='distance', p=1)
knn_model.fit(X_train, y_train)
test_predictions = knn_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('knn_submission4.csv', index=False)

# Iteration 5
knn_model = KNeighborsClassifier(n_neighbors=15, weights='uniform', p=2)
knn_model.fit(X_train, y_train)
test_predictions = knn_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('knn_submission5.csv', index=False)

# Iteration 6
knn_model = KNeighborsClassifier(n_neighbors=8, weights='distance', p=1)
knn_model.fit(X_train, y_train)
test_predictions = knn_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('knn_submission6.csv', index=False)

# Iteration 7
knn_model = KNeighborsClassifier(n_neighbors=4, weights='uniform', p=2)
knn_model.fit(X_train, y_train)
test_predictions = knn_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('knn_submission7.csv', index=False)

# Iteration 8
knn_model = KNeighborsClassifier(n_neighbors=6, weights='distance', p=2)
knn_model.fit(X_train, y_train)
test_predictions = knn_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('knn_submission8.csv', index=False)

# Iteration 9
knn_model = KNeighborsClassifier(n_neighbors=12, weights='uniform', p=1)
knn_model.fit(X_train, y_train)
test_predictions = knn_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('knn_submission9.csv', index=False)

# Iteration 10
knn_model = KNeighborsClassifier(n_neighbors=9, weights='distance', p=1)
knn_model.fit(X_train, y_train)
test_predictions = knn_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('knn_submission10.csv', index=False)


In [None]:
from sklearn.neighbors import KNeighborsClassifier

# Iteration 11
knn_model = KNeighborsClassifier(n_neighbors=11, weights='uniform', p=2)
knn_model.fit(X_train, y_train)
test_predictions = knn_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('knn_submission11.csv', index=False)

# Iteration 12
knn_model = KNeighborsClassifier(n_neighbors=13, weights='distance', p=1)
knn_model.fit(X_train, y_train)
test_predictions = knn_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('knn_submission12.csv', index=False)

# Iteration 13
knn_model = KNeighborsClassifier(n_neighbors=15, weights='uniform', p=1)
knn_model.fit(X_train, y_train)
test_predictions = knn_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('knn_submission13.csv', index=False)

# Iteration 14
knn_model = KNeighborsClassifier(n_neighbors=14, weights='distance', p=2)
knn_model.fit(X_train, y_train)
test_predictions = knn_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('knn_submission14.csv', index=False)

# Iteration 15
knn_model = KNeighborsClassifier(n_neighbors=16, weights='uniform', p=2)
knn_model.fit(X_train, y_train)
test_predictions = knn_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('knn_submission15.csv', index=False)

# Iteration 16
knn_model = KNeighborsClassifier(n_neighbors=12, weights='distance', p=1)
knn_model.fit(X_train, y_train)
test_predictions = knn_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('knn_submission16.csv', index=False)

# Iteration 17
knn_model = KNeighborsClassifier(n_neighbors=18, weights='uniform', p=1)
knn_model.fit(X_train, y_train)
test_predictions = knn_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('knn_submission17.csv', index=False)

# Iteration 18
knn_model = KNeighborsClassifier(n_neighbors=20, weights='distance', p=2)
knn_model.fit(X_train, y_train)
test_predictions = knn_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('knn_submission18.csv', index=False)

# Iteration 19
knn_model = KNeighborsClassifier(n_neighbors=19, weights='uniform', p=2)
knn_model.fit(X_train, y_train)
test_predictions = knn_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('knn_submission19.csv', index=False)

# Iteration 20
knn_model = KNeighborsClassifier(n_neighbors=17, weights='distance', p=1)
knn_model.fit(X_train, y_train)
test_predictions = knn_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('knn_submission20.csv', index=False)

# Iteration 21
knn_model = KNeighborsClassifier(n_neighbors=25, weights='uniform', p=2)
knn_model.fit(X_train, y_train)
test_predictions = knn_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('knn_submission21.csv', index=False)

# Iteration 22
knn_model = KNeighborsClassifier(n_neighbors=22, weights='distance', p=1)
knn_model.fit(X_train, y_train)
test_predictions = knn_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('knn_submission22.csv', index=False)

# Iteration 23
knn_model = KNeighborsClassifier(n_neighbors=23, weights='uniform', p=1)
knn_model.fit(X_train, y_train)
test_predictions = knn_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('knn_submission23.csv', index=False)

# Iteration 24
knn_model = KNeighborsClassifier(n_neighbors=24, weights='distance', p=2)
knn_model.fit(X_train, y_train)
test_predictions = knn_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('knn_submission24.csv', index=False)

# Iteration 25
knn_model = KNeighborsClassifier(n_neighbors=26, weights='uniform', p=2)
knn_model.fit(X_train, y_train)
test_predictions = knn_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('knn_submission25.csv', index=False)

# Iteration 26
knn_model = KNeighborsClassifier(n_neighbors=30, weights='distance', p=1)
knn_model.fit(X_train, y_train)
test_predictions = knn_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('knn_submission26.csv', index=False)

# Iteration 27
knn_model = KNeighborsClassifier(n_neighbors=28, weights='uniform', p=1)
knn_model.fit(X_train, y_train)
test_predictions = knn_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('knn_submission27.csv', index=False)

# Iteration 28
knn_model = KNeighborsClassifier(n_neighbors=27, weights='distance', p=2)
knn_model.fit(X_train, y_train)
test_predictions = knn_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('knn_submission28.csv', index=False)

# Iteration 29
knn_model = KNeighborsClassifier(n_neighbors=29, weights='uniform', p=2)
knn_model.fit(X_train, y_train)
test_predictions = knn_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('knn_submission29.csv', index=False)

# Iteration 30
knn_model = KNeighborsClassifier(n_neighbors=31, weights='distance', p=1)
knn_model.fit(X_train, y_train)
test_predictions = knn_model.predict(test_features)
submission = pd.DataFrame({'row ID': test_data['row ID'], 'Exited': test_predictions})
submission.to_csv('knn_submission30.csv', index=False)
