### Importing Libraries

In [6]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [7]:
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
y = train["label"].values
X = train.drop("label", axis=1).values.astype(np.float32)
X_test = test.values.astype(np.float32)

X /= 255.0
X_test /= 255.0

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.15, random_state=42, stratify=y)

### Grid search for C


In [8]:
param_grid = {'C': [0.1, 1.0, 5.0]}
logreg = LogisticRegression(multi_class='multinomial', solver='saga', max_iter=100, n_jobs=-1)
grid = GridSearchCV(logreg, param_grid, cv=2, verbose=2)
grid.fit(X_train, y_train)

print("Best params:", grid.best_params_)
best_model = grid.best_estimator_

yv = best_model.predict(X_val)
print("Validation accuracy:", accuracy_score(y_val, yv))

Fitting 2 folds for each of 3 candidates, totalling 6 fits




[CV] END ..............................................C=0.1; total time= 1.3min




[CV] END ..............................................C=0.1; total time=  40.6s




[CV] END ..............................................C=1.0; total time=  53.7s




[CV] END ..............................................C=1.0; total time=  52.2s




[CV] END ..............................................C=5.0; total time=  51.4s




[CV] END ..............................................C=5.0; total time=  48.3s
Best params: {'C': 0.1}
Validation accuracy: 0.915079365079365


### Train on full data & predict


In [9]:
final_model = LogisticRegression(
    multi_class='multinomial',
    solver='saga',
    C=grid.best_params_['C'],
    max_iter=100,
    n_jobs=-1
)
final_model.fit(X, y)
y_pred = final_model.predict(X_test)

submission = pd.DataFrame({"ImageId": np.arange(1, len(y_pred)+1), "Label": y_pred})
submission.to_csv("logreg_submission.csv", index=False)

