Know the dataset-

obj_ID: Unique identifier for each astronomical object.

alpha: Right ascension, which is the celestial equivalent of longitude.

delta: Declination, which is the celestial equivalent of latitude.

u: Ultraviolet magnitude, a measure of brightness in the ultraviolet range.

g: Green magnitude, a measure of brightness in the green range.

r: Red magnitude, a measure of brightness in the red range.

i: Near-infrared magnitude, a measure of brightness in the near-infrared range.

z: Infrared magnitude, a measure of brightness in the infrared range.

run_ID: Identifier for the specific observational run.

rerun_ID: Identifier for the reprocessing version of the data.

cam_col: Camera column, indicating which CCD column the data was taken from.

field_ID: Identifier for the field within the run.

spec_obj_ID: Unique identifier for the spectroscopic object.

class: Classification of the object (e.g., star, galaxy).

redshift: Measure of how much the wavelength of the light has been stretched by the 
expansion of the universe.

plate: Identifier for the spectroscopic plate.

MJD: Modified Julian Date, which is a standard astronomical timekeeping format.

fiber_ID: Identifier for the specific optical fiber used in the spectroscopic 
observation.

In [1]:

from pandas import  *
from numpy import *
from seaborn import *
from matplotlib.pyplot import *
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout,Flatten,Conv2D,MaxPooling2D,LSTM
from tensorflow.keras.callbacks import EarlyStopping
from  lightgbm import *
from catboost import CatBoostClassifier, CatBoostRegressor
from xgboost import *


In [2]:
df=read_csv('star_classification.csv')

In [3]:
df.columns

Index(['obj_ID', 'alpha', 'delta', 'u', 'g', 'r', 'i', 'z', 'run_ID',
       'rerun_ID', 'cam_col', 'field_ID', 'spec_obj_ID', 'class', 'redshift',
       'plate', 'MJD', 'fiber_ID'],
      dtype='object')

In [4]:
df.describe()

Unnamed: 0,obj_ID,alpha,delta,u,g,r,i,z,run_ID,rerun_ID,cam_col,field_ID,spec_obj_ID,redshift,plate,MJD,fiber_ID
count,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0
mean,1.237665e+18,177.629117,24.135305,21.980468,20.531387,19.645762,19.084854,18.66881,4481.36606,301.0,3.51161,186.13052,5.783882e+18,0.576661,5137.00966,55588.6475,449.31274
std,8438560000000.0,96.502241,19.644665,31.769291,31.750292,1.85476,1.757895,31.728152,1964.764593,0.0,1.586912,149.011073,3.324016e+18,0.730707,2952.303351,1808.484233,272.498404
min,1.237646e+18,0.005528,-18.785328,-9999.0,-9999.0,9.82207,9.469903,-9999.0,109.0,301.0,1.0,11.0,2.995191e+17,-0.009971,266.0,51608.0,1.0
25%,1.237659e+18,127.518222,5.146771,20.352353,18.96523,18.135828,17.732285,17.460677,3187.0,301.0,2.0,82.0,2.844138e+18,0.054517,2526.0,54234.0,221.0
50%,1.237663e+18,180.9007,23.645922,22.179135,21.099835,20.12529,19.405145,19.004595,4188.0,301.0,4.0,146.0,5.614883e+18,0.424173,4987.0,55868.5,433.0
75%,1.237668e+18,233.895005,39.90155,23.68744,22.123767,21.044785,20.396495,19.92112,5326.0,301.0,5.0,241.0,8.332144e+18,0.704154,7400.25,56777.0,645.0
max,1.237681e+18,359.99981,83.000519,32.78139,31.60224,29.57186,32.14147,29.38374,8162.0,301.0,6.0,989.0,1.412694e+19,7.011245,12547.0,58932.0,1000.0


In [5]:
df.isnull().sum()

obj_ID         0
alpha          0
delta          0
u              0
g              0
r              0
i              0
z              0
run_ID         0
rerun_ID       0
cam_col        0
field_ID       0
spec_obj_ID    0
class          0
redshift       0
plate          0
MJD            0
fiber_ID       0
dtype: int64

In [6]:
x=df[['alpha', 'delta', 'u', 'g', 'r', 'i', 'z', 'redshift',
       'plate', 'MJD']]

In [7]:
x.head()

Unnamed: 0,alpha,delta,u,g,r,i,z,redshift,plate,MJD
0,135.689107,32.494632,23.87882,22.2753,20.39501,19.16573,18.79371,0.634794,5812,56354
1,144.826101,31.274185,24.77759,22.83188,22.58444,21.16812,21.61427,0.779136,10445,58158
2,142.18879,35.582444,25.26307,22.66389,20.60976,19.34857,18.94827,0.644195,4576,55592
3,338.741038,-0.402828,22.13682,23.77656,21.61162,20.50454,19.2501,0.932346,9149,58039
4,345.282593,21.183866,19.43718,17.58028,16.49747,15.97711,15.54461,0.116123,6121,56187


 scaling the data would be better

In [8]:
'''scaler=StandardScaler()
x=scaler.fit_transform(x)'''

'scaler=StandardScaler()\nx=scaler.fit_transform(x)'

In [9]:
x

Unnamed: 0,alpha,delta,u,g,r,i,z,redshift,plate,MJD
0,135.689107,32.494632,23.87882,22.27530,20.39501,19.16573,18.79371,0.634794,5812,56354
1,144.826101,31.274185,24.77759,22.83188,22.58444,21.16812,21.61427,0.779136,10445,58158
2,142.188790,35.582444,25.26307,22.66389,20.60976,19.34857,18.94827,0.644195,4576,55592
3,338.741038,-0.402828,22.13682,23.77656,21.61162,20.50454,19.25010,0.932346,9149,58039
4,345.282593,21.183866,19.43718,17.58028,16.49747,15.97711,15.54461,0.116123,6121,56187
...,...,...,...,...,...,...,...,...,...,...
99995,39.620709,-2.594074,22.16759,22.97586,21.90404,21.30548,20.73569,0.000000,9374,57749
99996,29.493819,19.798874,22.69118,22.38628,20.45003,19.75759,19.41526,0.404895,7626,56934
99997,224.587407,15.700707,21.16916,19.26997,18.20428,17.69034,17.35221,0.143366,2764,54535
99998,212.268621,46.660365,25.35039,21.63757,19.91386,19.07254,18.62482,0.455040,6751,56368


In [10]:
y=df[['class']]

In [11]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=1)

RF

In [12]:
'''from sklearn.ensemble import RandomForestClassifier
r=RandomForestClassifier(n_estimators=100)
r.fit(x_train,y_train)

SyntaxError: incomplete input (18543511.py, line 1)

In [None]:
r.score(x_train,y_train)

0.9999875

In [None]:
r.score(x_test,y_test)

0.98125

In [None]:
r.predict([[
228.447728334265,12.745031931428,22.36261,21.7106,21.5963,21.60179,21.12727,2.689002,5488,56013
]])




array(['QSO'], dtype=object)

In [None]:
r.predict([[ 40.7957426727124,3.56436811387184,22.79262,22.29579,22.23841,21.55672,21.01273,1.065883,9417,58055
]])



array(['GALAXY'], dtype=object)

In [None]:
r.predict([[
169.41723647266,7.71984985397181,25.08139,22.03635,21.83119,21.71758,21.24758,0.0002660389,5369,56272
]])



array(['STAR'], dtype=object)

In [None]:
'''#saving The Models
import joblib
joblib.dump(r,'RF_adv_stars_class.pkl')

#loading The models anywhere

model=joblib.load('RF_adv_stars_class.pkl')'''

"#saving The Models\nimport joblib\njoblib.dump(r,'RF_adv_stars_class.pkl')\n\n#loading The models anywhere\n\nmodel=joblib.load('RF_adv_stars_class.pkl')"

GBoost

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
g=GradientBoostingClassifier(n_estimators=100)
g.fit(x_train,y_train)

  y = column_or_1d(y, warn=True)


In [None]:
g.score(x_train,y_train)

0.9774125

In [None]:
g.score(x_test,y_test)

0.9783

In [None]:
g.predict([[
228.447728334265,12.745031931428,22.36261,21.7106,21.5963,21.60179,21.12727,2.689002,5488,56013
]])




array(['QSO'], dtype=object)

In [None]:
g.predict([[ 40.7957426727124,3.56436811387184,22.79262,22.29579,22.23841,21.55672,21.01273,1.065883,9417,58055
]])



array(['GALAXY'], dtype=object)

In [None]:
g.predict([[
169.41723647266,7.71984985397181,25.08139,22.03635,21.83119,21.71758,21.24758,0.0002660389,5369,56272
]])



array(['STAR'], dtype=object)

In [None]:
'''#saving The Models
import joblib
joblib.dump(g,'GBoost_adv_stars_class.pkl')

#loading The models anywhere

model=joblib.load('GBoost_adv_stars_class.pkl')'''

In [None]:
# XGBoost: Known for its speed and performance.
# LightGBM: Optimized for speed and efficiency, especially with large datasets.
# CatBoost: Handles categorical features effectively and reduces the need for extensive preprocessing.

LightGBM - LGBMClassifier / LGBMRegressor

In [None]:
# Initialize the LightGBM classifier
l = LGBMClassifier(n_estimators=500, learning_rate=0.01)
l.fit(x_train, y_train)


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003233 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 80000, number of used features: 10
[LightGBM] [Info] Start training from score -0.519740
[LightGBM] [Info] Start training from score -1.664686
[LightGBM] [Info] Start training from score -1.532130


In [None]:
l.score(x_train,y_train)

0.981525

In [None]:
l.score(x_test,y_test)

0.98

In [None]:
l.predict([[
228.447728334265,12.745031931428,22.36261,21.7106,21.5963,21.60179,21.12727,2.689002,5488,56013
]])


array(['QSO'], dtype=object)

In [None]:
l.predict([[ 40.7957426727124,3.56436811387184,22.79262,22.29579,22.23841,21.55672,21.01273,1.065883,9417,58055
]])

array(['GALAXY'], dtype=object)

In [None]:
l.predict([[
169.41723647266,7.71984985397181,25.08139,22.03635,21.83119,21.71758,21.24758,0.0002660389,5369,56272
]])

array(['STAR'], dtype=object)

In [None]:
'''#saving The Models
import joblib
joblib.dump(l,'LGBM_adv_stars_class.pkl')

#loading The models anywhere

model=joblib.load('LGBM_adv_stars_class.pkl')'''

Xgboost syntax

In [None]:
'''from xgboost import *

x = XGBClassifier(n_estimators=100, xearning_rate=0.1)

# Fit the x to the training data
x.fit(x_train, y_train)

# Make predictions
y_pred = x.predict(x_test)

# Evaxuate the x
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.4f}')
'''

"from xgboost import *\n\nx = XGBClassifier(n_estimators=100, xearning_rate=0.1)\n\n# Fit the x to the training data\nx.fit(x_train, y_train)\n\n# Make predictions\ny_pred = x.predict(x_test)\n\n# Evaxuate the x\naccuracy = accuracy_score(y_test, y_pred)\nprint(f'Accuracy: {accuracy:.4f}')\n"

Catboost - CatBoostClassifier / CatBoostRegressor

In [None]:
c=CatBoostClassifier(n_estimators=500)
c.fit(x_train, y_train)

Learning rate set to 0.177726
0:	learn: 0.8160952	total: 22.8ms	remaining: 11.4s
1:	learn: 0.6462485	total: 45ms	remaining: 11.2s
2:	learn: 0.5277521	total: 65.3ms	remaining: 10.8s
3:	learn: 0.4419925	total: 89.3ms	remaining: 11.1s
4:	learn: 0.3762191	total: 113ms	remaining: 11.2s
5:	learn: 0.3241797	total: 139ms	remaining: 11.4s
6:	learn: 0.2812994	total: 239ms	remaining: 16.8s
7:	learn: 0.2487304	total: 265ms	remaining: 16.3s
8:	learn: 0.2236258	total: 287ms	remaining: 15.7s
9:	learn: 0.2035745	total: 311ms	remaining: 15.2s
10:	learn: 0.1856255	total: 340ms	remaining: 15.1s
11:	learn: 0.1720191	total: 372ms	remaining: 15.1s
12:	learn: 0.1609548	total: 394ms	remaining: 14.8s
13:	learn: 0.1448564	total: 414ms	remaining: 14.4s
14:	learn: 0.1362885	total: 434ms	remaining: 14s
15:	learn: 0.1297095	total: 456ms	remaining: 13.8s
16:	learn: 0.1233444	total: 481ms	remaining: 13.7s
17:	learn: 0.1188456	total: 513ms	remaining: 13.7s
18:	learn: 0.1144786	total: 538ms	remaining: 13.6s
19:	learn: 

<catboost.core.CatBoostClassifier at 0x21f8c629490>

In [None]:
c.score(x_train,y_train)

0.9843125

In [None]:
c.score(x_test,y_test)

0.98105

In [None]:
c.predict([[
228.447728334265,12.745031931428,22.36261,21.7106,21.5963,21.60179,21.12727,2.689002,5488,56013
]])


array([['QSO']], dtype=object)

In [None]:
c.predict([[ 40.7957426727124,3.56436811387184,22.79262,22.29579,22.23841,21.55672,21.01273,1.065883,9417,58055
]])

array([['GALAXY']], dtype=object)

In [None]:
c.predict([[
169.41723647266,7.71984985397181,25.08139,22.03635,21.83119,21.71758,21.24758,0.0002660389,5369,56272
]])

array([['STAR']], dtype=object)

In [None]:
'''#saving The Models
import joblib
joblib.dump(c,'CatBoost_adv_stars_class.pkl')

#loading The models anywhere

model=joblib.load('CatBoost_adv_stars_class.pkl')'''

ET

In [None]:
'''from sklearn.ensemble import ExtraTreesClassifier
e=ExtraTreesClassifier(n_estimators=200)
e.fit(x_train, y_train)'''

  return fit_method(estimator, *args, **kwargs)


In [13]:
import joblib
#loading The models anywhere

e=joblib.load('ET_adv_stars_class.pkl')

In [14]:
e.score(x_train,y_train)

1.0

In [15]:
e.score(x_test,y_test)

0.9763

In [16]:
e.predict([[
228.447728334265,12.745031931428,22.36261,21.7106,21.5963,21.60179,21.12727,2.689002,5488,56013
]])




array(['QSO'], dtype=object)

In [17]:
e.predict([[ 40.7957426727124,3.56436811387184,22.79262,22.29579,22.23841,21.55672,21.01273,1.065883,9417,58055
]])



array(['GALAXY'], dtype=object)

In [18]:
e.predict([[
169.41723647266,7.71984985397181,25.08139,22.03635,21.83119,21.71758,21.24758,0.0002660389,5369,56272
]])



array(['STAR'], dtype=object)

In [19]:
'''#saving The Models
import joblib
joblib.dump(e,'ET_adv_stars_class.pkl')

#loading The models anywhere
'''


"#saving The Models\nimport joblib\njoblib.dump(e,'ET_adv_stars_class.pkl')\n\n#loading The models anywhere\n"

In [22]:
e = joblib.load("C:\\imp\\ml JUPYTER\\MY ML PROJECTS(BOOK)\\3.Deep learning\\ANN\\Recreation\\space\\ADV star class\\ET_adv_stars_class.pkl")
print(type(e))  # Add this line to check the type of the loaded object


<class 'sklearn.ensemble._forest.ExtraTreesClassifier'>
