## Useful Imports 

In [4]:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
pd.set_option('display.max_rows',None)
%matplotlib inline

## Reading the Training Data

In [5]:
data=pd.read_csv('MNIST_data/train.csv')
data.shape

(42000, 785)

In [49]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42000 entries, 0 to 41999
Columns: 785 entries, label to pixel783
dtypes: int64(785)
memory usage: 251.5 MB


## Using 10000 Images in Training and Testing

In [9]:
X=data.iloc[:10000,1:]
Y=data.iloc[:10000,0]

In [10]:
X.shape,Y.shape

((10000, 784), (10000,))

### Spliting the training and testing data and labels

In [12]:
# X,Y=train_images()
X_train, X_test, y_train, y_test = train_test_split(
    X, Y, test_size=0.33, random_state=42)
# X_test=X_test[:2000]
# Y_test=y_test[:2000]

In [13]:
y_train.shape

(6700,)

### function to use while creating a convolutional neural net

In [15]:
def init_weights(shape):
    init_random_dist = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(init_random_dist)

In [16]:
def init_bias(shape):
    init_bias_vals = tf.constant(0.1, shape=shape)
    return tf.Variable(init_bias_vals)

In [17]:
def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

In [18]:
def max_pool_2by2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                          strides=[1, 2, 2, 1], padding='SAME')

In [19]:
def convolutional_layer(input_x, shape):
    W = init_weights(shape)
    b = init_bias([shape[3]])
    return tf.nn.relu(conv2d(input_x, W) + b)

In [20]:
def normal_full_layer(input_layer, size):
    input_size = int(input_layer.get_shape()[1])
    W = init_weights([input_size, size])
    b = init_bias([size])
    return tf.matmul(input_layer, W) + b

### Creating Tesorflow Variables 

In [21]:
x = tf.placeholder(tf.float32,shape=[None,784])

In [22]:
y_true = tf.placeholder(tf.float32,shape=[None,10])

In [23]:
x_image = tf.reshape(x,[-1,28,28,1])
x_image.shape

TensorShape([Dimension(None), Dimension(28), Dimension(28), Dimension(1)])

### Creating Convolutional Layers 

In [24]:
convo_1 = convolutional_layer(x_image,shape=[6,6,1,32])
convo_1_pooling = max_pool_2by2(convo_1)
print(convo_1.shape)
convo_1_pooling.shape

(?, 28, 28, 32)


TensorShape([Dimension(None), Dimension(14), Dimension(14), Dimension(32)])

In [25]:
convo_2 = convolutional_layer(convo_1_pooling,shape=[6,6,32,64])
convo_2_pooling = max_pool_2by2(convo_2)

In [26]:
convo_2_flat = tf.reshape(convo_2_pooling,[-1,7*7*64])
full_layer_one = tf.nn.relu(normal_full_layer(convo_2_flat,1024))

In [27]:
hold_prob = tf.placeholder(tf.float32)
full_one_dropout = tf.nn.dropout(full_layer_one,keep_prob=hold_prob)

In [28]:
y_pred = normal_full_layer(full_one_dropout,10)

In [29]:
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_true,logits=y_pred))

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



In [30]:
optimizer = tf.train.AdamOptimizer(learning_rate=0.0001)
train = optimizer.minimize(cross_entropy)

In [31]:
# Initianlizing the variables
init = tf.global_variables_initializer()

In [32]:
def indices_to_one_hot(data):
    """Convert an iterable of indices to one-hot encoded labels."""
    targets = np.array(data).reshape(-1)
    return np.eye(nb_classes)[targets]
# y_train=pd.get_dummies(y_train)
nb_classes = 10
y_train=indices_to_one_hot(y_train)
y_test=indices_to_one_hot(y_test)

In [33]:
input_images=pd.read_csv('MNIST_data/test.csv')
# input_images=input_images[:200]
input_images.head()

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Tensorflow Model

In [34]:
steps =6500
sess=tf.Session()
    
sess.run(init)

for i in range(steps):

    batch_x , batch_y = X_train.iloc[i*50:i*50+50,:],y_train[i*50:i*50+50]
    i=i+100
    sess.run(train,feed_dict={x:batch_x,y_true:batch_y,hold_prob:0.5})

    # PRINT OUT A MESSAGE EVERY 100 STEPS
    if i%100 == 0:

        print('Currently on step {}'.format(i))
        print('Accuracy is:')
        # Test the Train Model
        matches = tf.equal(tf.argmax(y_pred,1),tf.argmax(y_true,1))

        acc = tf.reduce_mean(tf.cast(matches,tf.float32))

        print(sess.run(acc,feed_dict={x:X_test,y_true:y_test,hold_prob:1.0}))
        print('\n')
      #newdata=put your data here


Currently on step 100
Accuracy is:
0.13757576


Currently on step 200
Accuracy is:
0.87060606


Currently on step 300
Accuracy is:
0.8566667


Currently on step 400
Accuracy is:
0.8566667


Currently on step 500
Accuracy is:
0.8566667


Currently on step 600
Accuracy is:
0.8566667


Currently on step 700
Accuracy is:
0.8566667


Currently on step 800
Accuracy is:
0.8566667


Currently on step 900
Accuracy is:
0.8566667


Currently on step 1000
Accuracy is:
0.8566667


Currently on step 1100
Accuracy is:
0.8566667


Currently on step 1200
Accuracy is:
0.8566667


Currently on step 1300
Accuracy is:
0.8566667


Currently on step 1400
Accuracy is:
0.8566667


Currently on step 1500
Accuracy is:
0.8566667


Currently on step 1600
Accuracy is:
0.8566667


Currently on step 1700
Accuracy is:
0.8566667


Currently on step 1800
Accuracy is:
0.8566667


Currently on step 1900
Accuracy is:
0.8566667


Currently on step 2000
Accuracy is:
0.8566667


Currently on step 2100
Accuracy is:
0.8566667



In [32]:
input_images.shape

(28000, 784)

## Prediction on the trained model

In [35]:
d=[]
for i in range(140):
    prediction=tf.argmax(y_pred,1)
    y=prediction.eval(feed_dict={x: input_images.iloc[i*200:i*200+200,:],hold_prob:1.0}, session=sess)
    d.append(y)

##      Printing output in required format

In [36]:
result =np.concatenate(d)
result

array([2, 0, 8, ..., 3, 9, 2], dtype=int64)

In [37]:
# np.savetxt('result.txt',result,delimiter=' ')

In [38]:
result.shape

(28000,)

In [40]:
a=np.arange(1,28001)
a=np.transpose(a)
a.shape

(28000,)

In [41]:
result=np.transpose(result)
result.shape

(28000,)

In [45]:
df1=pd.DataFrame(a,columns=['ImageId'])
df2=pd.DataFrame(result,columns=['Label'])
df3=pd.concat([df1,df2],axis=1)
df3.head()

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,8
3,4,4
4,5,2


In [46]:
df3.to_csv('Result.csv',index=False)

In [44]:
# sample=pd.read_csv('MNIST_data/sample_submission.csv')
# sample.head()

Unnamed: 0,ImageId,Label
0,1,0
1,2,0
2,3,0
3,4,0
4,5,0
