In [1]:
#importing the libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
import csv

In [2]:
star_data = pd.read_csv(r'C:\Users\souna\OneDrive\Desktop\api\star_data.csv')
star_data.head()

#Brown Dwarf -> Star Type = 0
#Red Dwarf -> Star Type = 1
#White Dwarf-> Star Type = 2
#Main Sequence -> Star Type = 3
#Supergiant -> Star Type = 4
#Hypergiant -> Star Type = 5


Unnamed: 0,Temperature (K),Luminosity(L/Lo),Radius(R/Ro),Absolute magnitude(Mv),Star type,Star color,Spectral Class
0,3068,0.0024,0.17,16.12,0,Red,M
1,3042,0.0005,0.1542,16.6,0,Red,M
2,2600,0.0003,0.102,18.7,0,Red,M
3,2800,0.0002,0.16,16.65,0,Red,M
4,1939,0.000138,0.103,20.06,0,Red,M


In [3]:
#lets the get the frequency of indivial star type
star_data['Star type'].value_counts()

Star type
0    40
1    40
2    40
3    40
4    40
5    40
Name: count, dtype: int64

In [4]:
#looking into the data

star_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 240 entries, 0 to 239
Data columns (total 7 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Temperature (K)         240 non-null    int64  
 1   Luminosity(L/Lo)        240 non-null    float64
 2   Radius(R/Ro)            240 non-null    float64
 3   Absolute magnitude(Mv)  240 non-null    float64
 4   Star type               240 non-null    int64  
 5   Star color              240 non-null    object 
 6   Spectral Class          240 non-null    object 
dtypes: float64(3), int64(2), object(2)
memory usage: 13.3+ KB


In [5]:
#changing the star type data as in form of red dwarf,brown dwarfs etc. according to the sequence by mapping 
#Brown Dwarf -> Star Type = 0
#Red Dwarf -> Star Type = 1
#White Dwarf-> Star Type = 2
#Main Sequence -> Star Type = 3
#Supergiant -> Star Type = 4
#Hypergiant -> Star Type = 5
star_type = {
    0 : 'Brown Dwarf',
    1 : 'Red Dwarf',
    2 : 'White Dwarf',
    3 : 'Main Sequence',
    4 : 'Supergiant',
    5 : 'Hypergiant',
}

star_data['Star type'] = star_data['Star type'].map(star_type)

In [6]:
#Saving the input features in X and target output in y

X = star_data.iloc[:, :-1]
X = star_data.select_dtypes(include=np.number) # Select only numerical columns for X
y = star_data.iloc[:, -3]
y

0      Brown Dwarf
1      Brown Dwarf
2      Brown Dwarf
3      Brown Dwarf
4      Brown Dwarf
          ...     
235     Hypergiant
236     Hypergiant
237     Hypergiant
238     Hypergiant
239     Hypergiant
Name: Star type, Length: 240, dtype: object

In [7]:
#Splitting the data into training and testing sets

from sklearn.model_selection import train_test_split as tts
X_train,X_test,y_train,y_test = tts(X,y,test_size=0.2,random_state=0)

In [8]:
#create the pipeline with the scaler and ML model :

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

pipe = Pipeline([('scaler',StandardScaler()),('classifier',LogisticRegression(solver='newton-cg',multi_class='multinomial'))])


In [9]:
#training part
pipe.fit(X_train,y_train)

In [10]:
#make the predictions
y_preds = pipe.predict(X_test)

In [11]:
y_actual = y_test.values

In [12]:
#chekcing for the incorrect values
incorrect_preds = np.where(y_preds != y_actual)[0]

for i in incorrect_preds:
  print(f"Predicted: {y_preds[i]}, Actual: {y_actual[i]}")

Predicted: Main Sequence, Actual: Supergiant
Predicted: Brown Dwarf, Actual: Red Dwarf
Predicted: Main Sequence, Actual: Supergiant
Predicted: Red Dwarf, Actual: Main Sequence


In [13]:
#Save the pipeline
from pickle import dump
with open('pipeline.pk1','wb') as file:
  dump(pipe,file)


In [14]:
#load the pipeline and test it
from pickle import load
with open('pipeline.pk1','rb') as file:
  pipeline_test = load(file)

In [15]:
#get data from test set
X_test.iloc[1, :]

Temperature (K)           3607.000
Luminosity(L/Lo)             0.022
Radius(R/Ro)                 0.380
Absolute magnitude(Mv)      10.120
Name: 71, dtype: float64

In [16]:
#feature list
features = X_test.columns.to_list()

In [17]:
#prediction
test_data = pd.DataFrame([[2637,0.00073,0.127,17.22]], columns=features)
output = pipeline_test.predict(test_data)
print(output)

['Brown Dwarf']
