In [11]:
# make and call a function that splits the data into training and testing sets
import numpy as np

def splitdata_train_test(data, fraction_training):
    np.random.shuffle(data)
    split_index = int(fraction_training*len(data))
    training = data[:split_index]
    testing = data[split_index:]
    return training, testing

# split the data using your function
data = np.load('galaxy_catalogue.npy')
fraction_training = 0.67
training, testing = splitdata_train_test(data, fraction_training)

# print the key values
print('Number data galaxies:', len(data))
print('Train fraction:', fraction_training)
print('Number of galaxies in training set:', len(training))
print('Number of galaxies in testing set:', len(testing))

Number data galaxies: 780
Train fraction: 0.67
Number of galaxies in training set: 522
Number of galaxies in testing set: 258


In [12]:
# make and call a function that generates the features and targets
import numpy as np

def generate_features_targets(data):

    targets = data['class']

    features = np.empty(shape=(len(data), 13))
    features[:, 0] = data['u-g']
    features[:, 1] = data['g-r']
    features[:, 2] = data['r-i']
    features[:, 3] = data['i-z']
    features[:, 4] = data['ecc']
    features[:, 5] = data['m4_u']
    features[:, 6] = data['m4_g']
    features[:, 7] = data['m4_r']
    features[:, 8] = data['m4_i']
    features[:, 9] = data['m4_z']

    # fill the remaining 3 columns with concentrations in the u, r and z filters
    # concentration in u filter
    features[:, 10] = data['petroR50_u']/data['petroR90_u']
    # concentration in r filter
    features[:, 11] = data['petroR50_r']/data['petroR90_r']
    # concentration in z filter
    features[:, 12] = data['petroR50_z']/data['petroR90_z']

    return features, targets


data = np.load('galaxy_catalogue.npy')

features, targets = generate_features_targets(data)

# Print the shape of each array to check the arrays are the correct dimensions. 
print("Features shape:", features.shape)
print("Targets shape:", targets.shape)

Features shape: (780, 13)
Targets shape: (780,)


In [4]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier


def splitdata_train_test(data, fraction_training):
    np.random.shuffle(data)
    split_index = int(fraction_training*len(data))
    training = data[:split_index]
    testing = data[split_index:]
    return training, testing


def generate_features_targets(data):

    targets = data['class']

    features = np.empty(shape=(len(data), 13))
    features[:, 0] = data['u-g']
    features[:, 1] = data['g-r']
    features[:, 2] = data['r-i']
    features[:, 3] = data['i-z']
    features[:, 4] = data['ecc']
    features[:, 5] = data['m4_u']
    features[:, 6] = data['m4_g']
    features[:, 7] = data['m4_r']
    features[:, 8] = data['m4_i']
    features[:, 9] = data['m4_z']

    # fill the remaining 3 columns with concentrations in the u, r and z filters
    # concentration in u filter
    features[:, 10] = data['petroR50_u']/data['petroR90_u']
    # concentration in r filter
    features[:, 11] = data['petroR50_r']/data['petroR90_r']
    # concentration in z filter
    features[:, 12] = data['petroR50_z']/data['petroR90_z']

    return features, targets



# complete this function by splitting the data set and training a decision tree classifier
def dtc_predict_actual(data):
    
    training, testing = splitdata_train_test(data,0.7)
    
    train_features, train_targets = generate_features_targets(training)
    test_features, test_targets = generate_features_targets(testing)

    dtc = DecisionTreeClassifier()
    
    dtc.fit(train_features,train_targets)
    
    predictions = dtc.predict(test_features)
    
    return predictions, test_targets

def calculate_accuracy(predicted, actual):
    return sum(predicted == actual)/len(actual), len(actual)



data = np.load('galaxy_catalogue.npy')
predictions, test_targets = dtc_predict_actual(data)
calculate_accuracy(predictions,test_targets)
 

(0.8162393162393162, 234)

In [18]:
# make a function that calculates the accuracy of your function
import numpy as np
from matplotlib import pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_predict
from sklearn.tree import DecisionTreeClassifier
from support_functions import plot_confusion_matrix, generate_features_targets


# Implement the following function
def calculate_accuracy(predicted, actual):
    return sum(predicted == actual)/len(actual)


data = np.load('galaxy_catalogue.npy')

# split the data
features, targets = generate_features_targets(data)

# train the model to get predicted and actual classes
dtc = DecisionTreeClassifier()
predicted = cross_val_predict(dtc, features, targets, cv=10)

# calculate the model score using your function
model_score = calculate_accuracy(predicted, targets)
print("Our accuracy score:", model_score)

# calculate the models confusion matrix using sklearns confusion_matrix function
class_labels = list(set(targets))
model_cm = confusion_matrix(y_true=targets, y_pred=predicted, labels=class_labels)

# Plot the confusion matrix using the provided functions.
plt.figure()
plot_confusion_matrix(model_cm, classes=class_labels, normalize=False)
plt.show()

ModuleNotFoundError: No module named 'support_functions'