## Load data

In [1]:
import pandas as pd

df_train = pd.read_csv("Datasets/train.csv")
y_train = df_train["label"]
X_train = df_train.drop("label", axis=1)

In [2]:
df_test = pd.read_csv("Datasets/test.csv")
y_test = []  # Empty no labels for test data
X_test = df_test

## Image processing

In [3]:
from sklearn.preprocessing import MinMaxScaler

scale = MinMaxScaler()

X_train = scale.fit_transform(X_train)

X_test = scale.transform(X_test)

## Data augmentation


In [4]:
from scipy.ndimage import shift

# Method to shift the image by given dimension
def shift_image(image, dx, dy):
    image = image.reshape((28, 28))
    shifted_image = shift(image, [dy, dx], cval=0, mode="constant")
    return shifted_image.reshape([-1])


# Creating Augmented Dataset
X_train_augmented = list(X_train)
y_train_augmented = list(y_train)

for dx, dy in ((1,0), (-1,0), (0,1), (0,-1)):
     for image, label in zip(X_train, y_train):
             X_train_augmented.append(shift_image(image, dx, dy))
             y_train_augmented.append(label)

## RandomForest model creation

In [5]:
from sklearn.ensemble import RandomForestClassifier

rnd_forest = RandomForestClassifier(n_estimators=800)

## RandomForest model training

In [6]:
print(rnd_forest.fit(X_train_augmented, y_train_augmented))

RandomForestClassifier(n_estimators=800)


## RandomForest model test

In [7]:
y_pred_test = rnd_forest.predict(X_test)

## Ouput file csv creation

In [8]:
df_pred = pd.DataFrame({'ImageId': list(range(1, len(y_pred_test) + 1)), 'Label': y_pred_test})
df_pred.to_csv("Im-Rises_randomforest.csv", index=False)
print(df_pred.head())


   ImageId  Label
0        1      2
1        2      0
2        3      9
3        4      9
4        5      3


In [9]:
# Score: 0.96821
# Data augmentation score:
