## Importing the required libraries and modules

In [1]:
import numpy as np
import os
from planetaryimage import PDS3Image
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix
#from joblib import dump, load

## Labelling the images with no cosmic ray artifacts as 0

In [4]:
#class0 No artifacts
fold_path=os.path.join(os.getcwd(),'class0NoArtifacts')   # Getting the images folder path
no_artifacts_sheet = []    # Creating an empty list
newsize = (256,256)   # Defining the new image size
for file in os.listdir(fold_path):   # Looping through all the imgaes
    try:
        image_object=PDS3Image.open(os.path.join(fold_path,file))
        img1 = np.copy(image_object.image)
        img1.resize(newsize)   # Resizing the image
        image = np.asarray(img1).reshape((1,-1))[0]   # Reshaping the numpy image array
        no_artifacts_sheet.append((image,0))   # Labelling all the images as '0'
    except:
        pass

F:\CERN_task\class0NoArtifacts


## Labelling the images with cosmic ray artifacts as 1

In [5]:
#class1 artifacts
fold_path=os.path.join(os.getcwd(),'class1Artifacts')   # Getting the images folder path
artifacts_sheet = []   # Creating an empty list
for file in os.listdir(fold_path):   # Looping through all the imgaes
    try:
        image_object=PDS3Image.open(os.path.join(fold_path,file))
        img1 = np.copy(image_object.image)
        img1.resize(newsize)   # Resizing the image
        image = np.asarray(img1).reshape((1,-1))[0]   # Reshaping the numpy image array
        artifacts_sheet.append((image,1))   # Labelling all the images as '1'
    except:
        pass

F:\CERN_task\class1Artifacts


## Preparing the complete dataset by combining the lists of label 0 and label 1 images

In [6]:
X = []
Y = []
data = artifacts_sheet + no_artifacts_sheet   # Combining both the labelled datasets
for x in data:
    X.append(x[0])   # Appending all the image arrays to X
    Y.append(x[1])   # Appending all the image labels to Y 

## Training the SVM Model using the prepared dataset

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42)   # Creating the training and test set

In [8]:
clf = SVC(C=100, kernel='rbf', gamma='scale')   # Building the Support Vector Classifier

In [14]:
clf.fit(X_train, y_train)   # Training the SVM Model
#dump(clf, 'my_model.joblib') 
#clf = load('my_model.joblib')

SVC(C=100, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

## Testing the trained model and calculating its accuracy

In [15]:
predicted = clf.predict(X_test)
print(accuracy_score(y_test, predicted))   # Printing the accuracy of the model

0.9554455445544554


## Printing the confusion matrix of the predicted result

In [16]:
print(confusion_matrix(y_test,predicted))

[[120   2]
 [  7  73]]
