In [57]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler


In [58]:
df = pd.read_csv("Stars.csv")
df.head()
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 240 entries, 0 to 239
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Temperature     240 non-null    int64  
 1   L               240 non-null    float64
 2   R               240 non-null    float64
 3   A_M             240 non-null    float64
 4   Color           240 non-null    object 
 5   Spectral_Class  240 non-null    object 
 6   Type            240 non-null    int64  
dtypes: float64(3), int64(2), object(2)
memory usage: 13.3+ KB


In [None]:
encoder = LabelEncoder()
df['Color'] = encoder.fit_transform(df['Color'])
df['Spectral_Class'] = encoder.fit_transform(df['Spectral_Class'])
print(df.head())

   Temperature         L       R    A_M  Color  Spectral_Class  Type
0         3068  0.002400  0.1700  16.12      8               5     0
1         3042  0.000500  0.1542  16.60      8               5     0
2         2600  0.000300  0.1020  18.70      8               5     0
3         2800  0.000200  0.1600  16.65      8               5     0
4         1939  0.000138  0.1030  20.06      8               5     0


In [60]:
X = df.drop('Type', axis=1)
y = df['Type'] 
print(X.head())

   Temperature         L       R    A_M  Color  Spectral_Class
0         3068  0.002400  0.1700  16.12      8               5
1         3042  0.000500  0.1542  16.60      8               5
2         2600  0.000300  0.1020  18.70      8               5
3         2800  0.000200  0.1600  16.65      8               5
4         1939  0.000138  0.1030  20.06      8               5


In [73]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [62]:
knn = KNeighborsClassifier(n_neighbors=3)

knn.fit(X_train, y_train)

print("Model Trained Succesfully")


Model Trained Succesfully


In [63]:

y_pred = knn.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")

print("\n--- Classification Report ---")
print(classification_report(y_test, y_pred))

Model Accuracy: 93.75%

--- Classification Report ---
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         8
           1       0.88      1.00      0.93         7
           2       1.00      1.00      1.00         6
           3       0.88      0.88      0.88         8
           4       0.88      0.88      0.88         8
           5       1.00      0.91      0.95        11

    accuracy                           0.94        48
   macro avg       0.94      0.94      0.94        48
weighted avg       0.94      0.94      0.94        48



In [68]:
betelgeuse_features = np.array([[3500, 126000, 887, -6.0, 4, 5]])
predicted_type = knn.predict(betelgeuse_features)
print(f"The model predicts Betelgeuse is Star Type: {predicted_type[0]}")


The model predicts Betelgeuse is Star Type: 5


In [71]:
sirius_features = np.array([[9940, 25.4, 1.7, 1.42, 2, 0]])
sirius_scaled = scaler.transform(sirius_features)

predicted_type = knn.predict(sirius_scaled)
print(predicted_type[0])

3




In [72]:
star_type_mapping = {
    0: 'Brown Dwarf',
    1: 'Red Dwarf',
    2: 'White Dwarf',
    3: 'Main Sequence',
    4: 'Supergiant',
    5: 'Hypergiant'
}

predicted_number = predicted_type[0]
predicted_name = star_type_mapping[predicted_number]

print(f"The model's raw output is: {predicted_number}")
print(f"The model predicts Betelgeuse is a: {predicted_name}")

The model's raw output is: 3
The model predicts Betelgeuse is a: Main Sequence
