## Importing the dataset

In [1]:
import pandas as pd
data = pd.read_csv("/Skydb.csv")
data.head()

Unnamed: 0,objid,ra,dec,u,g,r,i,z,run,rerun,camcol,field,specobjid,class,redshift,plate,mjd,fiberid
0,1.23767e+18,47.372545,0.820621,18.69254,17.13867,16.55555,16.34662,16.17639,4849,301,5,771,8.16863e+18,STAR,0.000115,7255,56597,832
1,1.23767e+18,116.303083,42.45598,18.47633,17.30546,17.24116,17.3278,17.37114,6573,301,6,220,9.33395e+18,STAR,-9.3e-05,8290,57364,868
2,1.23767e+18,172.756623,-8.785698,16.47714,15.31072,15.55971,15.72207,15.82471,5973,301,1,13,3.22121e+18,STAR,0.000165,2861,54583,42
3,1.23767e+18,201.224207,28.77129,18.63561,16.88346,16.09825,15.70987,15.43491,4649,301,3,121,2.25406e+18,GALAXY,0.058155,2002,53471,35
4,1.23767e+18,212.817222,26.625225,18.88325,17.87948,17.47037,17.17441,17.05235,4649,301,3,191,2.39031e+18,GALAXY,0.07221,2123,53793,74


## Creating the KNN Model 

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier

# Step 1: Remove unnecessary columns
data = data.drop(['objid', 'specobjid', 'plate', 'mjd', 'fiberid'], axis=1)

# Step 2: Split data into features (X) and target (y)
X = data.drop('class', axis=1)
y = data['class']

# Step 3: Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 5: Fit the KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train_scaled, y_train)

KNeighborsClassifier()

## Checking the accuracy of the KNN Model

In [3]:
y_pred = knn.predict(X_test_scaled)
actual_y = list(y_test.values)
count = 0 
for i in range(len(y_pred)):
    if y_pred[i] == actual_y[i]:
        count += 1
print("Accuracy : ",count/len(y_test.values)*100)


Accuracy :  86.87


## Creating confusion matrix 

In [4]:
from sklearn.metrics import confusion_matrix
# Create a confusion matrix
cmatrix = confusion_matrix(y_test, y_pred)

print("Confusion Matrix:")
print(cmatrix)

Confusion Matrix:
[[9165   16 1088]
 [ 164 1897   46]
 [1308    4 6312]]
