# CV Problem

Import required libraries

In [1]:
# Library required for some basic operations
import numpy as np
import pandas as pd

# Library to plot data
import matplotlib.pyplot as plt

# Library to open pickle files
import pickle

Load the required data from pickle files

In [2]:
# Load the image pickle file into train_x
with open('Vision_task_dataset_public/train_image.pkl', 'rb') as f:
    train_x = pickle.load(f)

# Load the image labels pickle file into train_y
with open('Vision_task_dataset_public/train_label.pkl', 'rb') as f:
    train_y = pickle.load(f)

Import additional libraries required to fit the data onto Machine Learning models

In [3]:
# Library to split the training data into train and test data
from sklearn.model_selection import train_test_split

# Libraries for Machine Learning models
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import SGDClassifier

# Library for model evaluation
from sklearn.metrics import accuracy_score

  from numpy.core.umath_tests import inner1d


Split the data into train and test set to verify different models

In [4]:
#Create training and test data using train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(train_x, train_y, test_size=0.4)

<b>Train a Random Forest Classifier Model</b>

In [5]:
# Train a Random forest classifier model
rf = RandomForestClassifier(n_estimators = 100)
rf.fit(X_train, Y_train)

# Predict on the test data
y_pred = rf.predict(X_test)

# Measure the accuracy of the model
print("test accuracy:",accuracy_score(Y_test, y_pred))

test accuracy: 0.8265625


<b> We have obtained an accuracy of 83% </b>

<b> Train a Linear Support Vector Machine Model </b>

In [6]:
# Train a Linear Support Vector Machine Model
sgd = SGDClassifier(loss='hinge', penalty='l2',alpha=1e-3, random_state=42, max_iter=5, tol=None)
sgd.fit(X_train, Y_train)

# Predict on the test data
y_pred = sgd.predict(X_test)

# Measure the accuracy of the model
print("test accuracy:",accuracy_score(Y_test, y_pred))

test accuracy: 0.7665625


<b> We have obtained an accuracy of 75% </b>

<b> Train a Naïve Bayes Model </b>

In [7]:
# Train a Naïve Bayes Model
nb = MultinomialNB()
nb.fit(X_train, Y_train)

# Predict on the test data
y_pred = nb.predict(X_test)

# Measure the accuracy of the model
print("test accuracy:",accuracy_score(Y_test, y_pred))

test accuracy: 0.7328125


<b> We have obtained an accuracy of 72% </b>

<b> We observe that Random Forest Classifier gives us better results </b>

## Predict the labels for the actual test data

Load the test set from the pickle file

In [8]:
# Load the image pickle file into test_x
with open('Vision_task_dataset_public/test_image.pkl', 'rb') as f:
    test_x = pickle.load(f)

Predict on test data using Random Forest Classifier

In [9]:
# Predict the data using Random Forest Classifier Model trained before
test_pred = rf.predict(test_x)

# Create a data frame of predicted class with index
pred_data = pd.DataFrame({"class":test_pred,"image_index":list(range(0,len(test_pred)))})
# Set image_index as index of data frame
pred_data.set_index('image_index', inplace = True)
# View a sample of data frame created
pred_data.sample(10)

Unnamed: 0_level_0,class
image_index,Unnamed: 1_level_1
1469,3
113,0
130,0
533,2
247,0
84,0
444,0
455,0
994,2
661,2


Save the data to .csv file

In [10]:
# Save the data frame to .csv file
pred_data.to_csv("hari_sai_raghuram_veeramallu.csv")