In [8]:
import numpy as np
import pandas as pd

pd.set_option('display.float_format', '{:.10f}'.format)

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score

from ModelPipeline import create_pipeline
import joblib

%matplotlib inline

In [2]:
df_ml =  pd.read_csv('../Shared Data.csv')

In [3]:
X = df_ml.drop(['rowid', 'koi_disposition'], axis=1)
y = df_ml['koi_disposition']
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42, stratify=y
)


In [5]:
# 1. Model nesnesini oluştur (k=7 seçildi)
k = 7
knn_model = KNeighborsClassifier(n_neighbors=k)
model_pipeline = create_pipeline(knn_model)

# 2. Modeli eğitim verileriyle eğit
model_pipeline.fit(X_train, y_train)

# 3. Test seti üzerinde tahmin yap

In [6]:
# Doğruluk Puanını hesapla
y_pred = model_pipeline.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"Seçilen Komşu Sayısı (k): {k}")
print(f"Test Veri Seti Doğruluğu: {accuracy:.4f}")

print("\nSınıflandırma Raporu:")
print(classification_report(y_test, y_pred))

Seçilen Komşu Sayısı (k): 7
Test Veri Seti Doğruluğu: 0.8386

Sınıflandırma Raporu:
                precision    recall  f1-score   support

     CANDIDATE       0.66      0.53      0.59       495
     CONFIRMED       0.82      0.93      0.87       686
FALSE POSITIVE       0.91      0.91      0.91      1210

      accuracy                           0.84      2391
     macro avg       0.80      0.79      0.79      2391
  weighted avg       0.83      0.84      0.83      2391



In [9]:
joblib.dump(model_pipeline, '../Trained Models/KNN Model.joblib')

['../Trained Models/KNN Model.joblib']