In [2]:
# CART on the Bank Note dataset
from random import seed
from random import randrange
from csv import reader
 
# Load a CSV file
def load_csv(filename):
	file = open(playtennis.csv, "rb")
	lines = reader(file)
	dataset = list(lines)
	return dataset
 
# Convert string column to float
def str_column_to_float(dataset, column):
	for row in dataset:
		row[column] = float(row[column].strip())
 
# Split a dataset into k folds
def cross_validation_split(dataset, n_folds):
	dataset_split = list()
	dataset_copy = list(dataset)
	fold_size = int(len(dataset) / n_folds)
	for i in range(n_folds):
		fold = list()
		while len(fold) < fold_size:
			index = randrange(len(dataset_copy))
			fold.append(dataset_copy.pop(index))
		dataset_split.append(fold)
	return dataset_split
 
# Calculate accuracy percentage
def accuracy_metric(actual, predicted):
	correct = 0
	for i in range(len(actual)):
		if actual[i] == predicted[i]:
			correct += 1
	return correct / float(len(actual)) * 100.0
 
# Evaluate an algorithm using a cross validation split
def evaluate_algorithm(dataset, algorithm, n_folds, *args):
	folds = cross_validation_split(dataset, n_folds)
	scores = list()
	for fold in folds:
		train_set = list(folds)
		train_set.remove(fold)
		train_set = sum(train_set, [])
		test_set = list()
		for row in fold:
			row_copy = list(row)
			test_set.append(row_copy)
			row_copy[-1] = None
		predicted = algorithm(train_set, test_set, *args)
		actual = [row[-1] for row in fold]
		accuracy = accuracy_metric(actual, predicted)
		scores.append(accuracy)
	return scores
 
# Split a dataset based on an attribute and an attribute value
def test_split(index, value, dataset):
	left, right = list(), list()
	for row in dataset:
		if row[index] < value:
			left.append(row)
		else:
			right.append(row)
	return left, right
 
# Calculate the Gini index for a split dataset
def gini_index(groups, classes):
	# count all samples at split point
	n_instances = float(sum([len(group) for group in groups]))
	# sum weighted Gini index for each group
	gini = 0.0
	for group in groups:
		size = float(len(group))
		# avoid divide by zero
		if size == 0:
			continue
		score = 0.0
		# score the group based on the score for each class
		for class_val in classes:
			p = [row[-1] for row in group].count(class_val) / size
			score += p * p
		# weight the group score by its relative size
		gini += (1.0 - score) * (size / n_instances)
	return gini
 
# Select the best split point for a dataset
def get_split(dataset):
	class_values = list(set(row[-1] for row in dataset))
	b_index, b_value, b_score, b_groups = 999, 999, 999, None
	for index in range(len(dataset[0])-1):
		for row in dataset:
			groups = test_split(index, row[index], dataset)
			gini = gini_index(groups, class_values)
			if gini < b_score:
				b_index, b_value, b_score, b_groups = index, row[index], gini, groups
	return {'index':b_index, 'value':b_value, 'groups':b_groups}
 
# Create a terminal node value
def to_terminal(group):
	outcomes = [row[-1] for row in group]
	return max(set(outcomes), key=outcomes.count)
 
# Create child splits for a node or make terminal
def split(node, max_depth, min_size, depth):
	left, right = node['groups']
	del(node['groups'])
	# check for a no split
	if not left or not right:
		node['left'] = node['right'] = to_terminal(left + right)
		return
	# check for max depth
	if depth >= max_depth:
		node['left'], node['right'] = to_terminal(left), to_terminal(right)
		return
	# process left child
	if len(left) <= min_size:
		node['left'] = to_terminal(left)
	else:
		node['left'] = get_split(left)
		split(node['left'], max_depth, min_size, depth+1)
	# process right child
	if len(right) <= min_size:
		node['right'] = to_terminal(right)
	else:
		node['right'] = get_split(right)
		split(node['right'], max_depth, min_size, depth+1)
 
# Build a decision tree
def build_tree(train, max_depth, min_size):
	root = get_split(train)
	split(root, max_depth, min_size, 1)
	return root
 
# Make a prediction with a decision tree
def predict(node, row):
	if row[node['index']] < node['value']:
		if isinstance(node['left'], dict):
			return predict(node['left'], row)
		else:
			return node['left']
	else:
		if isinstance(node['right'], dict):
			return predict(node['right'], row)
		else:
			return node['right']
 
# Classification and Regression Tree Algorithm
def decision_tree(train, test, max_depth, min_size):
	tree = build_tree(train, max_depth, min_size)
	predictions = list()
	for row in test:
		prediction = predict(tree, row)
		predictions.append(prediction)
	return(predictions)
 
# Test CART on Bank Note dataset
seed(1)
# load and prepare data
filename = 'data_banknote_authentication.csv'
dataset = load_csv(filename)
# convert string attributes to integers
for i in range(len(dataset[0])):
	str_column_to_float(dataset, i)
# evaluate algorithm
n_folds = 5
max_depth = 5
min_size = 10
scores = evaluate_algorithm(dataset, decision_tree, n_folds, max_depth, min_size)
print('Scores: %s' % scores)
print('Mean Accuracy: %.3f%%' % (sum(scores)/float(len(scores))))


NameError: name 'playtennis' is not defined

In [6]:
import math
import csv

def load_csv(filename):
    lines = csv.reader(open(filename, "r"));
    dataset = list(lines)
    headers = dataset.pop(0)
    return dataset, headers

class Node:
    def __init__(self, attribute):
        self.attribute = attribute
        self.children = []
        self.answer = ""  # NULL indicates children exists. # Not Null indicates this is a Leaf Node

def subtables(data, col, delete): 
    dic = {}
    coldata = [ row[col] for row in data]
    attr = list(set(coldata)) # All values of attribute retrived
    for k in attr:
        dic[k] = []

    for y in range(len(data)):
        key = data[y][col]
        if delete:
            del data[y][col]
        dic[key].append(data[y])
    return attr, dic

def entropy(S):
    attr = list(set(S))
    if len(attr) == 1: #if all are +ve/-ve then entropy = 0 
        return 0

    counts = [0,0] # Only two values possible 'yes' or 'no' 
    for i in range(2):
        counts[i] = sum( [1 for x in S if attr[i] == x] ) / (len(S) * 1.0)

    sums = 0
    for cnt in counts:
        sums += -1 * cnt * math.log(cnt, 2)
    return sums

def compute_gain(data, col):
    attValues, dic = subtables(data, col, delete=False)
    total_entropy = entropy([row[-1] for row in data])
    for x in range(len(attValues)):
        ratio = len(dic[attValues[x]]) / ( len(data) * 1.0)
        entro = entropy([row[-1] for row in dic[attValues[x]]]) 
        total_entropy -= ratio*entro

    return total_entropy
 
def build_tree(data, features):
    lastcol = [row[-1] for row in data]
    if (len(set(lastcol))) == 1: # If all samples have same labels return that label
        node=Node("")
        node.answer = lastcol[0]
        return node

    n = len(data[0])-1
    gains = [compute_gain(data, col) for col in range(n) ]

    split = gains.index(max(gains)) # Find max gains and returns index 
    node = Node(features[split]) # 'node' stores attribute selected #del (features[split])
    fea = features[:split]+features[split+1:]

    attr, dic = subtables(data, split, delete=True) # Data will be spilt in subtables 
    for x in range(len(attr)):
        child = build_tree(dic[attr[x]], fea) 
        node.children.append((attr[x], child))

    return node

def print_tree(node, level):
    if node.answer != "":
        print("     "*level, node.answer) # Displays leaf node yes/no 
        return

    print("       "*level, node.attribute) # Displays attribute Name 
    for value, n in node.children:
        print("     "*(level+1), value) 
        print_tree(n, level + 2)

def classify(node,x_test,features): 
    if node.answer != "":
        print(node.answer) 
        return

    pos = features.index(node.attribute)
    for value, n in node.children:
        if x_test[pos]==value: 
            classify(n,x_test,features)

#''' Main program '''
dataset, features = load_csv("C:\\Users\\Gunjan\\Desktop\\ML_Dataset\\ID3\\PlayTennis.csv") # Read Tennis data 
node = build_tree(dataset, features) # Build decision tree

print("The decision tree for the dataset using ID3 algorithm is ") 
print_tree(node, 0)

testdata, features = load_csv("C:\\Users\\Gunjan\\Desktop\\ML_Dataset\\ID3\\a.csv") 
for xtest in testdata:
    print("The test instance : ",xtest) 
    print("The predicted label : ", end="") 
    classify(node,xtest,features)


FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\Gunjan\\Desktop\\ML_Dataset\\ID3\\PlayTennis.csv'

In [8]:
import pandas as pd 
import math 
 
df = pd.read_csv('/Users/Chachu/Documents/Python Scripts/PlayTennis.csv') print("\n Input Data Set is:\n", df) 
 
t = df.keys()[-1] 
print('Target Attribute is: ', t) 
# Get the attribute names from input dataset 
attribute_names = list(df.keys()) 
#Remove the target attribute from the attribute names list attribute_names.remove(t)  
print('Predicting Attributes: ', attribute_names) #Function to calculate the entropy of collection S def entropy(probs):   
 
 
 
MACHINE LEARNING LABORATORY 
[As per Choice Based Credit System (CBCS) scheme] 
(Effective from the academic year 2017 - 2018) SEMESTER – VII 
Subject Code 	17CSL76 	IA Marks 	40 
Number of Lecture Hours/Week 	01I + 02P 	Exam Marks 	60 
Total Number of Lecture Hours 	40 	Exam Hours 	03 
CREDITS – 02 
Description (If any): 
1.	The programs can be implemented in either JAVA or Python. 
2.	For Problems 1 to 6 and 10, programs are to be developed without using the built-in classes or APIs of Java/Python. 
3.	Data 	sets 	can 	be 	taken 	from 	standard 	repositories 
(https://archive.ics.uci.edu/ml/datasets.html) or constructed by the students. 
Lab Experiments: 
1. Implement and demonstratethe FIND-Salgorithm for finding the most specific hypothesis based on a given set of training data samples. Read the training data from a .CSV file. 
2. For a given set of training data examples stored in a .CSV file, implement and demonstrate the Candidate-Elimination algorithmto output a description of the set of all hypotheses consistent with the training examples. 
3. Write a program to demonstrate the working of the decision tree  based  ID3 algorithm. Use an appropriate data set for building the decision tree and apply this knowledge toclassify a new sample. 
4. Build an Artificial Neural Network by implementing the Backpropagation algorithm and test the same using appropriate data sets. 
5.  Write a program to implement the naïve Bayesian classifier for a sample training  data set stored as a .CSV file. Compute the accuracy of the classifier, considering few test data sets. 
6. Assuming a set of documents that need to be classified, use the naïve Bayesian Classifier model to perform this task. Built-in Java classes/API can be used to write the program. Calculate the accuracy, precision, and recall for your data set. 
7. Write a program to construct aBayesian network considering medical data. Use this model to demonstrate the diagnosis of heart patients using standard Heart Disease Data Set. You can use Java/Python ML library classes/API. 
8. Apply EM algorithm to cluster a set of data stored in a .CSV file. Use the same data set for clustering using k-Means algorithm. Compare the results of these two algorithms and comment on the quality of clustering. You can add Java/Python ML library classes/API in the program. 
9. Write a program to implement k-Nearest Neighbour algorithm to classify the iris  data set. Print both correct and wrong predictions. Java/Python ML library classes can be used for this problem. 
10. Implement the non-parametric Locally Weighted Regressionalgorithm in order to fit data points. Select appropriate data set for your experiment and draw graphs. 
Study Experiment / Project: 
NIL 
Course outcomes: The students should be able to: 
1. Understand the implementation procedures for the machine learning algorithms. 
 
2.	Design Java/Python programs for various Learning algorithms. 
3.	Apply appropriate data sets to the Machine Learning algorithms. 
4.	Identify and apply Machine Learning algorithms to solve real world problems. 
Conduction of Practical Examination: 
•	All laboratory experiments are to be included for practical examination. 
•	Students are allowed to pick one experiment from the lot. 
•	Strictly follow the instructions as printed on the cover page of answer script 
•	Marks distribution: Procedure + Conduction + Viva:15 + 70 +15 (100) 
Change of experiment is allowed only once and marks allotted to the procedure part to be made zero. 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1.  LAB PROGRAM: 1 
 
2.	TITLE: FIND-S ALGORITHM 
3.	AIM: 
• Implement and demonstrate the FIND-S algorithm for finding the most specific hypothesis based  on a given set of training data samples. Read the training data from a .CSV file. 
 
4.	Find-S Algorithm 
 
1.	Initialize h to the most specific hypothesis in H 
2.	For each positive training instance x 
For each attribute constraint a i in h : 
If the constraint a i in h is satisfied by x then do nothing 
Else replace a i in h by the next more general constraint that is satisfied by x 
3.	Output hypothesis h 
 
5.	Implementation/ Program 1: 
 
import csv 
num_attribute=6 
a=[] with open('enjoysport.csv', 'r') as csvfile: 
    reader=csv.reader(csvfile)     for row in reader:         a.append(row)         print(row) 
print("\n The total number of training instances are : ",len(a)) num_attribute = len(a[0])-1 print("\n The initial hypothesis is : ") hypothesis = ['0']*num_attribute print(hypothesis) for j in range(0,num_attribute): 
    hypothesis[j]=a[0][j] 
print("\n Find-S: Finding maximally specific Hypothesis\n") for i in range(0,len(a)):     if a[i][num_attribute]=='Yes': 
        for j in range(0,num_attribute):             if a[i][j]!=hypothesis[j]:                  hypothesis[j]='?'             else: 
                hypothesis[j]=a[i][j] 
    print("\n For training Example No:{0} the hypothesis is".format(i),hypothesis) print("\n The Maximally specific hypothesis for the training instance is ") print(hypothesis) 
 
 
 
 
 
6. Result/Output: 
 
  
 
 
 
Training Data Set : enjoysport.csv 
 
 
  
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1.  LAB PROGRAM: 2 
 
2.	TITLE: Candidate-Elimination algorithm 
3.	AIM: 
• For a given set of training data examples stored in a .CSV file, implement and demonstrate the Candidate-Elimination algorithm to output a description of the set of all hypotheses consistent with the training examples. 
 
4.	Candidate-Elimination algorithm: 
 
Initialize G to the set of maximally general hypotheses in H 
1.	Initialize S to the set of maximally specific hypotheses in H 
2.	For each training example d, do 
2.1.	If d is a positive example 
Remove from G any hypothesis inconsistent with d , For each hypothesis s in S that is not consistent with d , 
Remove s from S 
Add to S all minimal generalizations h of s such that h is consistent with d, and some member of G is more general than h 
Remove from S, hypothesis that is more general than another hypothesis in S 
2.2.	If d is a negative example 
Remove from S any hypothesis inconsistent with d For each hypothesis g in G that is not consistent with d 
     Remove g from G 
Add to G all minimal specializations h of g such that h is consistent with d, and     
 	 some member of S is more specific than h 
                        Remove from G any hypothesis that is less general than another hypothesis in G 
 
5. Implementation/Program2: 
 
import csv a=[] with open("enjoysport.csv","r") as csvfile: 
    fdata=csv.reader(csvfile)     for row in fdata: 
        a.append(row)         print(row) num_att=len(a[0])-1 S=['0']*num_att G=['?']*num_att print(S) print(G) temp=[] for i in range(0,num_att):     S[i]=a[0][i] 
print("................................................") for i in range(0,len(a)): 
    if a[i][num_att]=="Yes":         for j in range(0,num_att):             if S[j]!=a[i][j]:                    S[j]='?' 
        for j in range(0,num_att):               for k in range(0,len(temp)):                     if temp[k][j]!=S[j] and temp[k][j]!='?': 
                        del temp[k]     if a[i][num_att]=='No':         for j in range(0,num_att):             if a[i][j]!=S[j] and S[j]!='?':                 G[j]=S[j]                 temp.append(G)                 G=['?']*num_att     print(S)     if len(temp)==0: 
        print(G)     else:  
        print(temp) 
    print("......................................................................") 
 
6. Result/Output: 
 
  
 
Training Data Set : enjoysport.csv 
 
  
 
1. Lab Program : 3 
 
2.	TITLE: ID3 ALGORITHM 
3.	AIM: 
Write a program to demonstrate the working of the decision tree based ID3 algorithm. Use appropriate data set for building the decision tree and apply this knowledge to classify a new sample. 
4.	ID3 algorithm: 
Algorithm: ID3(Examples, TargetAttribute, Attributes) Input: 
Examples are the training examples. 
Targetattribute is the attribute whose value is to be predicted by the tree. 
Attributes is a list of other attributes that may be tested by the learned decision tree. 
Output: Returns a decision tree that correctly classiJies the given Examples Method: 
1.	Create a Root node for the tree 
2.	If all Examples are positive, Return the single-node tree Root, with label = + 
3.	If all Examples are negative, Return the single-node tree Root, with label = - 
4.	If Attributes is empty, 
Return the single-node tree Root, with label = most common value of TargetAttribute in Examples 
Else 
A ← the attribute from Attributes that best classifies Examples The decision attribute for Root ←A For each possible value, vi, of A, 
Add a new tree branch below Root, corresponding to the test A = vi Let 
Examplesvi be the subset of Examples that have value vi for A 
If Examplesvi is empty Then below this new branch add a leaf node with label = most common value of TargetAttribute in Examples 
Else below this new branch add the subtree ID3(Examplesvi, TargetAttribute, Attributes–{A}) End 
Return Root 
 
5.	Implementation/Program: 
import pandas as pd 
import math 
 
df = pd.read_csv('/Users/Chachu/Documents/Python Scripts/PlayTennis.csv') print("\n Input Data Set is:\n", df) 
 
t = df.keys()[-1] 
print('Target Attribute is: ', t) 
# Get the attribute names from input dataset 
attribute_names = list(df.keys()) 
#Remove the target attribute from the attribute names list attribute_names.remove(t)  
print('Predicting Attributes: ', attribute_names) #Function to calculate the entropy of collection S def entropy(probs):   
    return sum( [-prob*math.log(prob, 2) for prob in probs]) #Function to calulate the entropy of the given Data Sets/List with  
#respect to target attributes def entropy_of_list(ls,value):   
    from collections import Counter 
    cnt = Counter(x for x in ls)# Counter calculates the propotion of class     print('Target attribute class count(Yes/No)=',dict(cnt))     total_instances = len(ls)   
    print("Total no of instances/records associated with {0} is: {1}".format(value,total_instances ))     probs = [x / total_instances for x in cnt.values()]  # x means no of YES/NO     print("Probability of Class {0} is: {1:.4f}".format(min(cnt),min(probs)))     print("Probability of Class {0} is: {1:.4f}".format(max(cnt),max(probs)))     return entropy(probs) # Call Entropy  
 
def information_gain(df, split_attribute, target_attribute,battr): 
    print("\n\n-----Information Gain Calculation of ",split_attribute, " --------")      df_split = df.groupby(split_attribute) # group the data based on attribute values     glist=[]     for gname,group in df_split: 
        print('Grouped Attribute Values \n',group)         glist.append(gname)  
     
    glist.reverse() 
    nobs = len(df.index) * 1.0    
    df_agg1=df_split.agg({target_attribute:lambda x:entropy_of_list(x, glist.pop())})     df_agg2=df_split.agg({target_attribute :lambda x:len(x)/nobs}) 
     
    df_agg1.columns=['Entropy'] 
    df_agg2.columns=['Proportion'] 
     
    # Calculate Information Gain:     new_entropy = sum( df_agg1['Entropy'] * df_agg2['Proportion'])     if battr !='S':         old_entropy = entropy_of_list(df[target_attribute],'S-'+df.iloc[0][df.columns.get_loc(battr)])     else: 
        old_entropy = entropy_of_list(df[target_attribute],battr)     return old_entropy - new_entropy 
 
 
 
def id3(df, target_attribute, attribute_names, default_class=None,default_attr='S'): 
     
    from collections import Counter 
    cnt = Counter(x for x in df[target_attribute])# class of YES /NO 
     
    ## First check: Is this split of the dataset homogeneous? 
    if len(cnt) == 1: 
        return next(iter(cnt))  # next input data set, or raises StopIteration when EOF is hit. 
     
    ## Second check: Is this split of the dataset empty? if yes, return a default value     elif df.empty or (not attribute_names): 
        return default_class  # Return None for Empty Data Set 
     
    ## Otherwise: This dataset is ready to be devied up! 
    else: 
        # Get Default Value for next recursive call of this function:         default_class = max(cnt.keys()) #No of YES and NO Class         # Compute the Information Gain of the attributes:         gainz=[]         for attr in attribute_names: 
            ig= information_gain(df, attr, target_attribute,default_attr)             gainz.append(ig) 
            print('Information gain of ',attr,' is : ',ig) 
         
        index_of_max = gainz.index(max(gainz))                        best_attr = attribute_names[index_of_max         print("\nAttribute with the maximum gain is: ", best_attr)         # Create an empty tree, to be populated in a moment         tree = {best_attr:{}} # Initiate the tree with best attribute as a node          remaining_attribute_names =[i for i in attribute_names if i != best_attr] 
         
        # Split dataset-On each split, recursively call this algorithm.Populate the empty tree with subtrees, which 
        # are the result of the recursive call         for attr_val, data_subset in df.groupby(best_attr):             subtree = id3(data_subset,target_attribute, remaining_attribute_names,default_class,best_attr) 
            tree[best_attr][attr_val] = subtree         return tree 
     
    from pprint import pprint tree = id3(df,t,attribute_names) print("\nThe Resultant Decision Tree is:") print(tree) 
 
def classify(instance, tree,default=None): # Instance of Play Tennis with Predicted         attribute = next(iter(tree)) # Outlook/Humidity/Wind        
    if instance[attribute] in tree[attribute].keys(): # Value of the attributs in  set of Tree keys           result = tree[attribute][instance[attribute]] 
        if isinstance(result, dict): # this is a tree, delve deeper             return classify(instance, result)         else: 
            return result # this is a label     else: 
        return default 
     
df_new=pd.read_csv('/Users/Chachu/Documents/Python Scripts/PlayTennisTest.csv') df_new['predicted'] = df_new.apply(classify, axis=1, args=(tree,'?'))  print(df_new) 
 
 
 
 
 
 
 
 	 
6. Result/Output: 
 
  
 
 
Training Data Set : PlayTennis.csv 
 
  
 
Testing Data Set : PlayTennisTest.csv 
 
  
 
 
 
 
 
11. LAB PROGRAM: 4 
2.	TITLE: BACKPROPAGATION ALGORITHM 
3.	AIM: 
    Build an Artificial Neural Network by implementing the Back propagation algorithm and test the  same using appropriate data sets. 
 
4.	Backpropagation Algorithm: 
 
  
5.	Implementation/ Program:  
 
import numpy as np 
X = np.array(([2, 9], [1, 5], [3, 6]))  y = np.array(([92], [86], [89]))  
y = y/100  
 
def sigmoid(x):  
    return 1/(1 + np.exp(-x)) 
 
def derivatives_sigmoid(x): 
    return x * (1 - x) 
 
epoch=10000  lr=0.1  
inputlayer_neurons = 2  hiddenlayer_neurons = 3  
output_neurons = 1  
 
wh=np.random.uniform(size=(inputlayer_neurons,hiddenlayer_neurons)) bias_hidden=np.random.uniform(size=(1,hiddenlayer_neurons))  
weight_hidden=np.random.uniform(size=(hiddenlayer_neurons,output_neurons))  bias_output=np.random.uniform(size=(1,output_neurons))  
 
for i in range(epoch):     hinp1=np.dot(X,wh)     hinp= hinp1 + bias_hidden  
    hlayer_activation = sigmoid(hinp) 
     
    outinp1=np.dot(hlayer_activation,weight_hidden)     outinp= outinp1+ bias_output 
    output = sigmoid(outinp) 
      
    EO = y-output  
    outgrad = derivatives_sigmoid(output)      d_output = EO * outgrad  
    EH = d_output.dot(weight_hidden.T)   
    hiddengrad = derivatives_sigmoid(hlayer_activation)      d_hiddenlayer = EH * hiddengrad 
 
    weight_hidden += hlayer_activation.T.dot(d_output) *lr 
    bias_hidden += np.sum(d_hiddenlayer, axis=0,keepdims=True) *lr 
 
    wh += X.T.dot(d_hiddenlayer) *lr 
    bias_output += np.sum(d_output, axis=0,keepdims=True) *lr 
 
print("Input: \n" + str(X)) print("Actual Output: \n" + str(y)) 
print("Predicted Output: \n" ,output) 
 
6. Result/Output: 
 
  
 
 
1. LAB PROGRAM: 5 
 
2.	TITLE: NAÏVE BAYESIAN CLASSIFIER 
3.	AIM: 
Write a program to implement the naïve Bayesian classifier for a sample training data set stored as a .CSV file. Compute the accuracy of the classifier, considering few test data sets. 
 
4.	Algorithm: 
   
5.	Implementation/Program : 
import numpy as np import math import csv import pdb def read_data(filename): 
 
    with open(filename,'r') as csvfile:         datareader = csv.reader(csvfile)         metadata = next(datareader) 
        traindata=[]         for row in datareader:             traindata.append(row) 
 
    return (metadata, traindata) 
 
def splitDataset(dataset, splitRatio): 
    trainSize = int(len(dataset) * splitRatio)     trainSet = []     testset = list(dataset) 
    i=0     while len(trainSet) < trainSize:         trainSet.append(testset.pop(i))     return [trainSet, testset] 
 
def classify(data,test): 
 
    total_size = data.shape[0] 
    print("\n") 
    print("training data size=",total_size) 
    print("test data size=",test.shape[0]) 
 
    countYes = 0     countNo = 0     probYes = 0     probNo = 0     print("\n") 
    print("target    count    probability") 
 
    for x in range(data.shape[0]): 
        if data[x,data.shape[1]-1] == 'yes': 
            countYes +=1         if data[x,data.shape[1]-1] == 'no': 
            countNo +=1 
 
    probYes=countYes/total_size 
    probNo= countNo / total_size 
 
    print('Yes',"\t",countYes,"\t",probYes) 
    print('No',"\t",countNo,"\t",probNo) 
 
 
    prob0 =np.zeros((test.shape[1]-1))     prob1 =np.zeros((test.shape[1]-1))     accuracy=0     print("\n") 
    print("instance prediction  target") 
 
    for t in range(test.shape[0]):         for k in range (test.shape[1]-1): 
            count1=count0=0             for j in range (data.shape[0]):                 #how many times appeared with no                 if test[t,k] == data[j,k] and data[j,data.shape[1]-1]=='no': 
                    count0+=1 
                #how many times appeared with yes                 if test[t,k]==data[j,k] and data[j,data.shape[1]-1]=='yes': 
                    count1+=1             prob0[k]=count0/countNo 
            prob1[k]=count1/countYes 
 
        probno=probNo         probyes=probYes         for i in range(test.shape[1]-1):             probno=probno*prob0[i]             probyes=probyes*prob1[i]         if probno>probyes: 
            predict='no'         else: 
            predict='yes' 
 
        print(t+1,"\t",predict,"\t    ",test[t,test.shape[1]-1])         if predict == test[t,test.shape[1]-1]: 
            accuracy+=1     final_accuracy=(accuracy/test.shape[0])*100     print("accuracy",final_accuracy,"%")     return 
 
metadata,traindata= read_data("/Users/Chachu/Documents/Python Scripts/tennis.csv") splitRatio=0.6 
trainingset, testset=splitDataset(traindata, splitRatio) training=np.array(trainingset) print("\n The Training data set are:") for x in trainingset: 
    print(x) 
     
testing=np.array(testset) print("\n The Test data set are:") for x in testing: 
    print(x) 
classify(training,testing) 
 
 
 
6. Result /Output: 
                           
 
 
 
Training Data Set : tennis.csv 
 
  
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1. LAB PROGRAM: 6 
 
2.	TITLE: DOCUMENT CLASSIFICATION USING NAÏVE BAYESIAN CLASSIFIER 
3.	AIM: 
• 	Assuming a set of documents that need to be classified, use the naïve Bayesian Classifier model to perform this task. Built-in Java classes/API can be used to write the program. 
Calculate the accuracy, precision, and recall for your data set. 
4.	Algorithm: 
  
 
 
Analysis of Document Classification: 
 
  
•	For classification tasks, the terms true positives, true negatives, false positives, and false negatives compare the results of the classifier under test with trusted external judgments. The terms positive and negative refer to the classifier's prediction (sometimes known as the expectation), and the terms true and false refer to whether that prediction corresponds to the external judgment (sometimes known as the observation). 
•	Precision - Precision is the ratio of correctly predicted positive documents to the total predicted positive documents. High precision relates to the low false positive rate. 
Precision = (Σ True positive ) / ( Σ True positive + Σ False positive) 
•	Recall (Sensitivity) - Recall is the ratio of correctly predicted positive documents to the all observations in actual class. 
Recall = (Σ True positive ) / ( Σ True positive + Σ False negative) 
•	Accuracy - Accuracy is the most intuitive performance measure and it is simply a ratio of correctly predicted observation to the total observations. One may think that, if we have high accuracy then our model is best. Yes, accuracy is a great measure but only when you have symmetric datasets where values of false positive and false negatives are almost same. Therefore, you have to look at other parameters to evaluate the performance of your model. For our model, we have got 0.803 which means our model is approx. 80% accurate. 
                                                     Accuracy = (Σ True positive + Σ True negative) / Σ Total population 
 
5.	Implementation/ Program: 
 
import pandas as pd 
from sklearn.model_selection import train_test_split from sklearn.feature_extraction.text import CountVectorizer from sklearn.naive_bayes import MultinomialNB from sklearn import metrics 
 
msg=pd.read_csv('/Users/Chachu/Documents/PythonScripts/naivetext.csv',names=['message',' label']) 
 
print('The dimensions of the dataset',msg.shape) 
 
msg['labelnum']=msg.label.map({'pos':1,'neg':0}) X=msg.message 
y=msg.labelnum 
 
#splitting the dataset into train and test data xtrain,xtest,ytrain,ytest=train_test_split(X,y) print ('\n the total number of Training Data :',ytrain.shape) print ('\n the total number of Test Data :',ytest.shape) 
 
#output the words or Tokens in the text documents cv = CountVectorizer() xtrain_dtm = cv.fit_transform(xtrain) xtest_dtm=cv.transform(xtest) print('\n The words or Tokens in the text documents \n') print(cv.get_feature_names()) df=pd.DataFrame(xtrain_dtm.toarray(),columns=cv.get_feature_names()) # Training Naive Bayes (NB) classifier on training data. clf = MultinomialNB().fit(xtrain_dtm,ytrain) predicted = clf.predict(xtest_dtm) 
#printing accuracy, Confusion matrix, Precision and Recall 
print('\n Accuracy of the classifier is',metrics.accuracy_score(ytest,predicted)) print('\n Confusion matrix') 
print(metrics.confusion_matrix(ytest,predicted)) 
print('\n The value of Precision', metrics.precision_score(ytest,predicted)) print('\n The value of Recall', metrics.recall_score(ytest,predicted)) 
 
 
 
6. Result/ Output: 
 
  
 
Training Data Set : naivetext.csv 
 
  
 
 
 
 
 
1. LAB PROGRAM: 7 
 
2.	TITLE: BAYESIAN NETWORK 
3.	AIM: 
Write a program to construct a Bayesian network considering medical data. Use this model to demonstrate the diagnosis of heart patients using standard Heart Disease Data Set. You can use Java/Python ML library classes/API. 
4.	Heart Disease Data set 
  
5.	Implementation/ Program 7: 
import numpy as np import pandas as pd import csv from pgmpy.estimators import MaximumLikelihoodEstimator from pgmpy.models import BayesianModel from pgmpy.inference import VariableElimination 
#read Cleveland Heart Disease data heartDisease = pd.read_csv('/Users/Chachu/Documents/Python Scripts/heart.csv') heartDisease = heartDisease.replace('?',np.nan) 
#display the data print('Sample instances from the dataset are given below') print(heartDisease.head()) 
#display the Attributes names and datatyes print('\n Attributes and datatypes') print(heartDisease.dtypes) #Creat Model- Bayesian Network model 
=BayesianModel([('age','heartdisease'),('sex','heartdisease'),('exang','heartdisease'),('cp','heartdisease'),(' heartdisease', 
'restecg'),('heartdisease','chol')]) 
#Learning CPDs using Maximum Likelihood Estimators print('\n Learning CPD using Maximum likelihood estimators') model.fit(heartDisease,estimator=MaximumLikelihoodEstimator) 
# Inferencing with Bayesian Network 
print('\n Inferencing with Bayesian Network:') 
HeartDiseasetest_infer = VariableElimination(model) #computing the Probability of HeartDisease given restecg print('\n 1.Probability of HeartDisease given evidence=restecg :1') q1=HeartDiseasetest_infer.query(variables=['heartdisease'],evidence={'restecg':1}) print(q1) 
#computing the Probability of HeartDisease given cp print('\n 2.Probability of HeartDisease given evidence= cp:2 ') q2=HeartDiseasetest_infer.query(variables=['heartdisease'],evidence={'cp':2}) print(q2) 
 
 
 
 
 
 
 
 
 
 
 
 
 
6. Result/Output: 
 
   
Training Data Set : heart.csv (Sample) 
  
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1. LAB PROGRAM: 8 
 
2.	TITLE: CLUSTERING BASED ON EM ALGORITHM AND K-MEANS 
3.	AIM:     
  Apply EM algorithm to cluster a set of data stored in a .CSV file. Use the same data set for clustering using k-Means algorithm. Compare the results of these two algorithms and comment on the quality of clustering. You can add Java/Python ML library classes/API in the program. 
 
4.	THEORY: 
Expectation Maximization algorithm 
•	The basic approach and logic of this clustering method is as follows. 
•	Suppose we measure a single continuous variable in a large sample of observations. Further, suppose that the sample consists of two clusters of observations with different means (and perhaps different standard deviations); within each sample, the distribution of values for the continuous variable follows the normal distribution. 
•	The goal of EM clustering is to estimate the means and standard deviations for each cluster so as to maximize the likelihood of the observed data (distribution). 
•	Put another way, the EM algorithm attempts to approximate the observed distributions of values based on mixtures of different distributions in different clusters. The results of EM clustering are different from those computed by k-means clustering. 
•	The latter will assign observations to clusters to maximize the distances between clusters. The EM algorithm does not compute actual assignments of observations to clusters, but classification probabilities. 
•	In other words, each observation belongs to each cluster with a certain probability. Of course, as a final result we can usually review an actual assignment of observations to clusters, based on the (largest) classification probability. K means Clustering 
•	The algorithm will categorize the items into k groups of similarity. To calculate that similarity, we will use the euclidean distance as measurement. 
•	The algorithm works as follows: 
1.	First we initialize k points, called means, randomly. 
2.	We categorize each item to its closest mean and we update the mean’s coordinates, which are the averages of the items categorized in that mean so far. 
3.	We repeat the process for a given number of iterations and at the end, we have our clusters. 
•	The “points” mentioned above are called means, because they hold the mean values of the items categorized in it. To initialize these means, we have a lot of options. An intuitive method is to initialize the means at random items in the data set. Another method is to initialize the means at random values between the boundaries of the data set (if for a feature x the items have values in [0,3], we will initialize the means with values for x at [0,3]). 
•	Pseudocode: 
1.	Initialize k means with random values 
2.	For a given number of iterations: Iterate through items: 
Find the mean closest to the item Assign item to mean 
 	 	                    Update mean 
 
 
5.	Implementation/Program : 
 
import matplotlib.pyplot as plt  from sklearn import datasets from sklearn.cluster import KMeans  import pandas as pd 
import numpy as np  
                                         # import some data to play with  iris = datasets.load_iris() 
X = pd.DataFrame(iris.data) 
X.columns =  ['Sepal_Length','Sepal_Width','Petal_Length','Petal_Width']  y = pd.DataFrame(iris.target) y.columns = ['Targets'] 
 
# Build the K Means Model model = KMeans(n_clusters=3) 
model.fit(X)        # model.labels_ : Gives cluster no for which samples belongs to 
 
# # Visualise the clustering results  plt.figure(figsize=(14,14)) colormap = np.array(['red', 'lime', 'black']) 
# Plot the Original Classifications using Petal features  plt.subplot(2, 2, 1) 
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[y.Targets], s=40)  plt.title('Real Clusters') plt.xlabel('Petal Length')  plt.ylabel('Petal Width') 
# Plot the Models Classifications 
plt.subplot(2, 2, 2) 
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[model.labels_], s=40)  plt.title('K-Means Clustering') plt.xlabel('Petal Length')  plt.ylabel('Petal Width') 
 
# General EM for GMM 
from sklearn import preprocessing 
# transform your data such that its distribution will have a # mean value 0 and standard deviation of 1. scaler = preprocessing.StandardScaler()  scaler.fit(X) 
xsa = scaler.transform(X) 
xs = pd.DataFrame(xsa, columns = X.columns) 
 
from sklearn.mixture import GaussianMixture  gmm = GaussianMixture(n_components=3)  gmm.fit(xs) 
gmm_y = gmm.predict(xs) plt.subplot(2, 2, 3) plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[gmm_y], s=40) plt.title('GMM Clustering') plt.xlabel('Petal Length')  plt.ylabel('Petal Width') 
print('Observation: The GMM using EM algorithm based clustering matched the true labels 
more closely than the Kmeans.') 
 
 
    6. Result/Output: 
 
  
 
 
 
 
 
1. LAB PROGRAM: 9 
 
2.	TITLE: K-NEAREST NEIGHBOUR 
3.	AIM:     
  Write a program to implement k-Nearest Neighbour algorithm to classify the iris data set. Print both correct and wrong predictions. Java/Python ML library classes can be used for this problem. 
4.	THEORY: 
•	K-Nearest Neighbors is one of the most basic yet essential classification algorithms in Machine Learning. It belongs to the supervised learning domain and finds intense application in pattern recognition, data mining and intrusion detection. 
•	It is widely disposable in real-life scenarios since it is non-parametric, meaning, it does not make any underlying assumptions about the distribution of data. 
 
•	Algorithm 
Input: Let m be the number of training data samples. Let p be an unknown point. 
Method: 
1.	Store the training samples in an array of data points arr[]. This means each element of this array represents a tuple (x, y). 
2.	for i=0 to m 
Calculate Euclidean distance d(arr[i], p). 
3.	Make set S of K smallest distances obtained. Each of these distances correspond to an already classified data point. 
                          Return the majority label among S. 
 
5.	Implementation/ Program: 
from sklearn.model_selection import train_test_split  from sklearn.neighbors import KNeighborsClassifier  from sklearn import datasets 
 
# Load dataset  iris=datasets.load_iris()  
print("Iris Data set loaded...") 
 
# Split the data into train and test samples x_train, x_test, y_train, y_test = train_test_split(iris.data,iris.target,test_size=0.1)  print("Dataset is split into training and testing...") 
print("Size of trainng data and its label",x_train.shape,y_train.shape)  print("Size of trainng data and its label",x_test.shape, y_test.shape) 
 
# Prints Label no. and their names  for i in range(len(iris.target_names)):     print("Label", i , "-",str(iris.target_names[i])) 
    # Create object of KNN classifier 
classifier = KNeighborsClassifier(n_neighbors=1) 
 
 
# Perform Training  
classifier.fit(x_train, y_train) # Perform testing y_pred=classifier.predict(x_test) 
 
# Display the results 
print("Results of Classification using K-nn with K=1 ")  for r in range(0,len(x_test)): 
    print(" Sample:", str(x_test[r]), " Actual-label:", str(y_test[r]), " Predicted-label:", str(y_pred[r])) print("Classification Accuracy :" , classifier.score(x_test,y_test)); 
 
from sklearn.metrics import classification_report, confusion_matrix print('Confusion Matrix') 
print(confusion_matrix(y_test,y_pred))  print('Accuracy Metrics')  
print(classification_report(y_test,y_pred)
 

IndentationError: unindent does not match any outer indentation level (<tokenize>, line 113)

In [10]:
import pandas as pd 
import math 
 
df = pd.read_csv('/Users/Chachu/Documents/Python Scripts/playTennis.csv') print("\n Input Data Set is:\n", df) 
 
t = df.keys()[-1] 
print('Target Attribute is: ', t) 
# Get the attribute names from input dataset 
attribute_names = list(df.keys()) 
#Remove the target attribute from the attribute names list attribute_names.remove(t)  
print('Predicting Attributes: ', attribute_names) #Function to calculate the entropy of collection S def entropy(probs):   
    return sum( [-prob*math.log(prob, 2) for prob in probs]) #Function to calulate the entropy of the given Data Sets/List with  
#respect to target attributes def entropy_of_list(ls,value):   
    from collections import Counter 
    cnt = Counter(x for x in ls)# Counter calculates the propotion of class     print('Target attribute class count(Yes/No)=',dict(cnt))     total_instances = len(ls)   
    print("Total no of instances/records associated with {0} is: {1}".format(value,total_instances ))     probs = [x / total_instances for x in cnt.values()]  # x means no of YES/NO     print("Probability of Class {0} is: {1:.4f}".format(min(cnt),min(probs)))     print("Probability of Class {0} is: {1:.4f}".format(max(cnt),max(probs)))     return entropy(probs) # Call Entropy  
 
def information_gain(df, split_attribute, target_attribute,battr): 
    print("\n\n-----Information Gain Calculation of ",split_attribute, " --------")      df_split = df.groupby(split_attribute) # group the data based on attribute values     glist=[]     for gname,group in df_split: 
        print('Grouped Attribute Values \n',group)         glist.append(gname)  
     
    glist.reverse() 
    nobs = len(df.index) * 1.0    
    df_agg1=df_split.agg({target_attribute:lambda x:entropy_of_list(x, glist.pop())})     df_agg2=df_split.agg({target_attribute :lambda x:len(x)/nobs}) 
     
    df_agg1.columns=['Entropy'] 
    df_agg2.columns=['Proportion'] 
     
    # Calculate Information Gain:     new_entropy = sum( df_agg1['Entropy'] * df_agg2['Proportion'])     if battr !='S':         old_entropy = entropy_of_list(df[target_attribute],'S-'+df.iloc[0][df.columns.get_loc(battr)])     else: 
        old_entropy = entropy_of_list(df[target_attribute],battr)     return old_entropy - new_entropy 
 
 
 
def id3(df, target_attribute, attribute_names, default_class=None,default_attr='S'): 
     
    from collections import Counter 
    cnt = Counter(x for x in df[target_attribute])# class of YES /NO 
     
    ## First check: Is this split of the dataset homogeneous? 
    if len(cnt) == 1: 
        return next(iter(cnt))  # next input data set, or raises StopIteration when EOF is hit. 
     
    ## Second check: Is this split of the dataset empty? if yes, return a default value     elif df.empty or (not attribute_names): 
        return default_class  # Return None for Empty Data Set 
     
    ## Otherwise: This dataset is ready to be devied up! 
    else: 
        # Get Default Value for next recursive call of this function:         default_class = max(cnt.keys()) #No of YES and NO Class         # Compute the Information Gain of the attributes:         gainz=[]         for attr in attribute_names: 
            ig= information_gain(df, attr, target_attribute,default_attr)             gainz.append(ig) 
            print('Information gain of ',attr,' is : ',ig) 
         
        index_of_max = gainz.index(max(gainz))                        best_attr = attribute_names[index_of_max         print("\nAttribute with the maximum gain is: ", best_attr)         # Create an empty tree, to be populated in a moment         tree = {best_attr:{}} # Initiate the tree with best attribute as a node          remaining_attribute_names =[i for i in attribute_names if i != best_attr] 
         
        # Split dataset-On each split, recursively call this algorithm.Populate the empty tree with subtrees, which 
        # are the result of the recursive call         for attr_val, data_subset in df.groupby(best_attr):             subtree = id3(data_subset,target_attribute, remaining_attribute_names,default_class,best_attr) 
            tree[best_attr][attr_val] = subtree         return tree 
     
    from pprint import pprint tree = id3(df,t,attribute_names) print("\nThe Resultant Decision Tree is:") print(tree) 
 
def classify(instance, tree,default=None): # Instance of Play Tennis with Predicted         attribute = next(iter(tree)) # Outlook/Humidity/Wind        
    if instance[attribute] in tree[attribute].keys(): # Value of the attributs in  set of Tree keys           result = tree[attribute][instance[attribute]] 
        if isinstance(result, dict): # this is a tree, delve deeper             return classify(instance, result)         else: 
            return result # this is a label     else: 
        return default 
     
df_new=pd.read_csv('/Users/Chachu/Documents/Python Scripts/PlayTennisTest.csv') df_new['predicted'] = df_new.apply(classify, axis=1, args=(tree,'?'))  print(df_new) 
 
 


IndentationError: unindent does not match any outer indentation level (<tokenize>, line 52)

In [12]:
import numpy as np 
X = np.array(([2, 9], [1, 5], [3, 6]))  Y = np.array(([92], [86], [89]))  
y = y/100  
 
def sigmoid(x):  
    return 1/(1 + np.exp(-x)) 
 
def derivatives_sigmoid(x): 
    return x * (1 - x) 
 
epoch=10000  lr=0.1  
inputlayer_neurons = 2  hiddenlayer_neurons = 3  
output_neurons = 1  
 
wh=np.random.uniform(size=(inputlayer_neurons,hiddenlayer_neurons)) bias_hidden=np.random.uniform(size=(1,hiddenlayer_neurons))  
weight_hidden=np.random.uniform(size=(hiddenlayer_neurons,output_neurons))  bias_output=np.random.uniform(size=(1,output_neurons))  
 
for i in range(epoch):     hinp1=np.dot(X,wh)     hinp= hinp1 + bias_hidden  
    hlayer_activation = sigmoid(hinp) 
     
    outinp1=np.dot(hlayer_activation,weight_hidden)     outinp= outinp1+ bias_output 
    output = sigmoid(outinp) 
      
    EO = y-output  
    outgrad = derivatives_sigmoid(output)      d_output = EO * outgrad  
    EH = d_output.dot(weight_hidden.T)   
    hiddengrad = derivatives_sigmoid(hlayer_activation)      d_hiddenlayer = EH * hiddengrad 
 
    weight_hidden += hlayer_activation.T.dot(d_output) *lr 
    bias_hidden += np.sum(d_hiddenlayer, axis=0,keepdims=True) *lr 
 
    wh += X.T.dot(d_hiddenlayer) *lr 
    bias_output += np.sum(d_output, axis=0,keepdims=True) *lr 
 
print("Input: \n" + str(X)) print("Actual Output: \n" + str(y)) 
print("Predicted Output: \n" ,output) 
 


SyntaxError: invalid syntax (<ipython-input-12-63e9c3b01ae8>, line 2)

In [13]:
import numpy as np # numpy is commonly used to process number array

X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float) # Features ( Hrs Slept, Hrs Studied) 
y = np.array(([92], [86], [89]), dtype=float)	# Labels(Marks obtained)

X = X/np.amax(X,axis=0) # Normalize 
y = y/100

def sigmoid(x):
    return 1/(1 + np.exp(-x))
def sigmoid_grad(x):
    return x * (1 - x)

# Variable initialization
epoch=1000	#Setting training iterations
eta =0.2		#Setting learning rate (eta)
input_neurons = 2	#number of features in data set 
hidden_neurons = 3	#number of hidden layers neurons
output_neurons = 1	#number of neurons at output layer

# Weight and bias - Random initialization
wh=np.random.uniform(size=(input_neurons,hidden_neurons))	# 2x3 
bh=np.random.uniform(size=(1,hidden_neurons))	# 1x3 
wout=np.random.uniform(size=(hidden_neurons,output_neurons)) # 1x1 
bout=np.random.uniform(size=(1,output_neurons))

for i in range(epoch):
#Forward Propogation
    h_ip=np.dot(X,wh) + bh	# Dot product + bias 
    h_act = sigmoid(h_ip)	# Activation function 
    o_ip=np.dot(h_act,wout) + bout
    output = sigmoid(o_ip)

#Backpropagation
    # Error at Output layer
    Eo = y-output	# Error at o/p 
    outgrad = sigmoid_grad(output)
    d_output = Eo* outgrad	# Errj=Oj(1-Oj)(Tj-Oj)

    # Error at Hidden later
    Eh = d_output.dot(wout.T)	# .T means transpose
    hiddengrad = sigmoid_grad(h_act)	# How much hidden layer wts contributed to error 
    d_hidden = Eh * hiddengrad
    wout += h_act.T.dot(d_output) *eta	# Dotproduct of nextlayererror and currentlayerop 
    wh += X.T.dot(d_hidden) *eta

print("Normalized Input: \n" + str(X))
print("Actual Output: \n" + str(y))
print("Predicted Output: \n" ,output)


Normalized Input: 
[[0.66666667 1.        ]
 [0.33333333 0.55555556]
 [1.         0.66666667]]
Actual Output: 
[[0.92]
 [0.86]
 [0.89]]
Predicted Output: 
 [[0.89461888]
 [0.87922034]
 [0.89597774]]
