In [11]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline


In [12]:
train_df = pd.read_csv("hacktrain.csv")
test_df = pd.read_csv("hacktest.csv")
submission_template = pd.read_csv("submission_11062025.csv")

In [13]:
train_df = train_df.drop(columns=['Unnamed: 0'])
test_df = test_df.drop(columns=['Unnamed: 0'])

In [14]:
X = train_df.drop(columns=['ID', 'class'])
y = train_df['class']
X_test = test_df.drop(columns=['ID'])

In [15]:
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

In [16]:
pipeline_with_pca = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler()),
    ('pca', PCA(n_components=10))
])

X_pca = pipeline_with_pca.fit_transform(X)
X_test_pca = pipeline_with_pca.transform(X_test)


In [17]:
model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000)
model.fit(X_pca, y_encoded)

test_preds_encoded = model.predict(X_test_pca)
test_preds = label_encoder.inverse_transform(test_preds_encoded)




In [18]:
submission = pd.DataFrame({
    'ID': test_df['ID'],
    'Predicted_Class': test_preds
})

submission = submission[submission_template.columns]
submission.to_csv("submission_11062025_2.csv", index=False)
