In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import seaborn as sns
import re
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

%matplotlib inline

In [None]:
df=pd.read_csv("/content/train_star.csv")

In [None]:
print(df.head(),df.info(),df.isna().sum(),df.columns,sep="\n\n\n")

In [None]:
x=set(''.join(list(df["SpType"])))
print(x)

In [None]:
#Sp type has compact data to be extracted
df["SpectralClass"]=df["SpType"].str[0]
df["SubClass"]=pd.to_numeric(df["SpType"].str[1],errors="coerce")
df["SubClass"]=df["SubClass"].fillna(df["SubClass"].median())

def extract_luminosity(s):
    if pd.isna(s):
        return None
    s=s.replace(":", "")
    match=re.search(r"(Ia|Ib|Iab|I|II|III|IV|V)$", s)
    return match.group(1) if match else None

df["LuminosityClass"]=df["SpType"].apply(extract_luminosity)
df.head()

In [None]:
df=df.drop(columns="SpType")
df.head()

In [None]:
LC=LabelEncoder()
df["TargetClass"]=LC.fit_transform(df["TargetClass"])

spec_enc=LabelEncoder()
lum_enc=LabelEncoder()

df["SpectralClass"]=spec_enc.fit_transform(df["SpectralClass"])
df["LuminosityClass"]=lum_enc.fit_transform(df["LuminosityClass"])

import joblib
joblib.dump(spec_enc,"spec_enc.pkl")
joblib.dump(lum_enc,"lum_enc.pkl")

df.head()

In [None]:
df.dtypes

In [None]:
#tryin to bring target class as last column for better visualisation
x=df["TargetClass"]
df=df.drop(columns="TargetClass")
df["TargetClass"]=x
df.head()

In [None]:
sns.heatmap(df.corr(),cmap="coolwarm")

In [None]:
#Target class has strong correlation with luminosity class, B-v,spectral class and moderate with Amag
Y=df["TargetClass"]
X=df.drop(columns=["TargetClass","Plx","Vmag","e_Plx"])

print(X.head(),Y.head(),sep="\n\n")
X=torch.tensor(X.values,dtype=torch.float32)
Y=torch.tensor(Y.values,dtype=torch.long)

scaler=StandardScaler()
X=scaler.fit_transform(X.numpy())
X=torch.tensor(X,dtype=torch.float32)

joblib.dump(scaler,"scaler.pkl")

x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size=0.3)
y_train=y_train.float().unsqueeze(1)
y_test=y_test.float().unsqueeze(1)

In [None]:
model=nn.Sequential(
    nn.Linear(5,20),
    nn.ReLU(),
    nn.Dropout(p=0.1),

    nn.Linear(20,10),
    nn.ReLU(),

    nn.Linear(10,1)
)

In [None]:
criterion=nn.BCEWithLogitsLoss()
optimizer=torch.optim.AdamW(model.parameters(),lr=0.002)
epochs=7000
trainloss=[]
testloss=[]
for i in range(epochs):
    model.train()
    optimizer.zero_grad()
    output=model(X)
    loss=criterion(output,Y.view(-1,1).float())
    loss.backward()

    optimizer.step()
    trainloss.append(loss.item())

    '''model.eval()
    with torch.no_grad():
        output=model(x_test)
        loss_test=criterion(output,y_test)
        testloss.append(loss_test.item())'''
    if i%50==0:
        print(f'Epoch :{i+1}/{epochs},Train loss:{loss.item()}')


In [None]:
'''plt.plot(range(epochs),trainloss,label="Train Loss")
plt.plot(range(epochs),testloss,label="Test Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.savefig("loss_curve_gen6.png")
plt.show()
plt.close()'''

In [None]:
torch.save(model.state_dict(),"model6.pth")

In [None]:
'''from sklearn.metrics import accuracy_score

preds=(torch.sigmoid(model(x_test))>=0.5).int()
acc=accuracy_score(y_test.int(),preds)
print(acc)'''

In [None]:
df=pd.read_csv("/content/test_star.csv")

df["SpectralClass"]=df["SpType"].str[0]
df["SubClass"]=pd.to_numeric(df["SpType"].str[1],errors="coerce")
df["SubClass"]=df["SubClass"].fillna(df["SubClass"].median())

def extract_luminosity(s):
    if pd.isna(s):
        return None
    s=s.replace(":", "")
    match=re.search(r"(Ia|Ib|Iab|I|II|III|IV|V)$", s)
    return match.group(1) if match else None

df["LuminosityClass"]=df["SpType"].apply(extract_luminosity)

df=df.drop(columns="SpType")
df.head()

import joblib

spec_enc=joblib.load("spec_enc.pkl")
lum_enc=joblib.load("lum_enc.pkl")
df["SpectralClass"]=df["SpectralClass"].where(
    df["SpectralClass"].isin(spec_enc.classes_),
    "O" )

df["SpectralClass"]=spec_enc.transform(df["SpectralClass"])
df["LuminosityClass"]=lum_enc.transform(df["LuminosityClass"])

X=df.drop(columns=["Plx","Vmag","e_Plx"])
X.head()


In [None]:
model.eval()
X_test=torch.tensor(X.values,dtype=torch.float32)
scaler=joblib.load("scaler.pkl")
X=scaler.transform(X.values)
X_test=torch.tensor(X,dtype=torch.float32)

with torch.no_grad():
    probs=torch.sigmoid(model(X_test))
    preds=(probs>=0.5).int().numpy().ravel()

submission=pd.DataFrame({
    "index":df.index,
    "TargetClass":preds
})
submission.to_csv("submission2.csv",index=False)