In [1]:
import numpy as np
import pandas as pd
import random

In [2]:
df=pd.read_csv("iris.data.csv")
df.head()

Unnamed: 0,5.1,3.5,1.4,0.2,Iris-setosa
0,4.9,3.0,1.4,0.2,Iris-setosa
1,4.7,3.2,1.3,0.2,Iris-setosa
2,4.6,3.1,1.5,0.2,Iris-setosa
3,5.0,3.6,1.4,0.2,Iris-setosa
4,5.4,3.9,1.7,0.4,Iris-setosa


In [3]:
df=df.rename(columns={'5.1': "Sepal.Length", '3.5': "Sepal.Width",'1.4':"Petal.Length",'0.2':"Petal.Width","Iris-setosa":"Species"})

In [4]:
df['Species'].value_counts()

Iris-versicolor    50
Iris-virginica     50
Iris-setosa        49
Name: Species, dtype: int64

In [5]:
df['Species']=df['Species'].replace("Iris-versicolor",0).replace('Iris-virginica',1).replace("Iris-setosa",2)

In [6]:
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
scale_data=sc.fit_transform(df[['Sepal.Length','Sepal.Width','Petal.Length','Petal.Width']])
scale_data.shape

(149, 4)

In [7]:
x=scale_data
y=df['Species']
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,random_state=1,train_size=0.8)

In [8]:
x_train[:,0].shape
x_train=x_train.T
x_test=x_test.T

In [9]:
def softmax(z):
    return np.exp(z)/np.sum(np.exp(z))

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def ReLU(z):
    return np.maximum(0.0,z)

def ReLU_deriv(Z):
    return Z > 0

def tanh(x):
    return np.tanh(x)

def tanh_derivative(x):
    return 1 - np.tanh(x)**2

def leaky_relu(x, alpha=0.1):
    return np.maximum(alpha * x, x)

def leaky_relu_derivative(x, alpha=0.1):
    return np.where(x > 0, 1, alpha)

def one_hot(Y):
    one_hot_Y = np.zeros((Y.size, Y.max() + 1))
    one_hot_Y[np.arange(Y.size), Y] = 1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y

In [10]:
def weight_params():
    w1=np.random.rand(4,4)-0.5
    b1=np.random.rand(4,1)-0.5
    w2=np.random.rand(3,4)-0.5
    b2=np.random.rand(3,1)-0.5
    return w1,b1,w2,b2

In [11]:
def for_prop(w1,b1,w2,b2,xx,j):
    z1=w1.dot(xx)+b1
    A1=j(z1)
    z2=w2.dot(A1)+b2
    A2=softmax(z2)
    return z1,A1,z2,A2

In [12]:
def back_prop(Z1, A1, Z2, A2, W1, W2, X, Y,k):
    m=y_train.size
    one_hot_Y = one_hot(Y)
    dZ2 = A2 - one_hot_Y
    dW2 = 1 / m * dZ2.dot(A1.T)
    db2 = 1 / m * np.sum(dZ2)
    dZ1 = np.dot(W2.T,dZ2) * k(Z1)    
    
    dW1 = 1 / m * dZ1.dot(X.T)
    db1 = 1 / m * np.sum(dZ1)
    return dW1, db1, dW2, db2

In [13]:
def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, learning_rate):
    W1 = W1 - learning_rate * dW1
    b1 = b1 - learning_rate * db1    
    W2 = W2 - learning_rate * dW2  
    b2 = b2 - learning_rate * db2    
    return W1, b1, W2, b2

def get_predictions(A2):
    return np.argmax(A2, 0)

def get_accuracy(predictions, Y):
    #print(predictions, Y)
    return np.sum(predictions == Y) / Y.size

In [14]:
def grad_desc(X, Y, alpha, iterations):
    W1, b1, W2, b2 = weight_params()
    act_list=[tanh,leaky_relu,ReLU]
    act_der=[tanh_derivative,leaky_relu_derivative,ReLU_deriv]
    best=[]
    iteration=[]
    act=[]
    for j,k in zip(act_list,act_der):
        
        for i in range(iterations):
            Z1, A1, Z2, A2 = for_prop(W1, b1, W2, b2, X,j)
            dW1, db1, dW2, db2 = back_prop(Z1, A1, Z2, A2, W1, W2, X, Y,k)
            W1, b1, W2, b2 = update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha)
            if i % 10 == 0:
                predictions = get_predictions(A2)
                res=get_accuracy(predictions, Y)
                #print(res)
                best.append(float(res))
                iteration.append(i)
                act.append(j)
                
    var=max(best)
    for i,j,k in zip(best,iteration,act):
        k=((str(k))[10:-22])
        if i==var:
            print('max_accuracy :',i,"iteration_",j,"act :",k)
        
    return W1, b1, W2, b2 

In [15]:
W1, b1, W2, b2 =grad_desc(x_train, y_train, 0.01, 100)

max_accuracy : 0.7478991596638656 iteration_ 90 act : tanh 


In [16]:
def for_prop2(w1,b1,w2,b2,xx):
    z1=w1.dot(xx)+b1
    A1=ReLU(z1)
    z2=w2.dot(A1)+b2
    A2=softmax(z2)
    return z1,A1,z2,A2

z1,A1,z2,A2=for_prop2(W1, b1, W2, b2,x_train)

In [17]:
z1,A1,z2,A2=for_prop2(W1, b1, W2, b2,x_train)


### train_loss>>>>

In [18]:
y_train = np.asarray(y_train)
y_pred = np.asarray(A2)

categorical_cross_entropy=-np.sum(y_train*np.log(y_pred+10**-100))
categorical_cross_entropy

#jacoblan=-y_train/(y_pred+10**100)
categorical_cross_entropy

14318.397961360173

In [19]:
y_train.shape == y_pred.shape
train_loss = -np.sum(y_train * np.log(y_pred), axis=-1)
train_loss

array([5784.49069024, 5873.01129983, 2660.89597129])

In [20]:
### categorical_cross_entropy_function>>>>>>

np.random.seed(42)
def cross_Entropy(y_train, A2):                 # CE
    return -np.sum(y_train * np.log(A2 + 10**-100))

def cross_E_grad(y_true, y_pred):              # CE Jacobian
    return -y_true/(y_pred + 10**-100)

-np.sum(y_train * np.log(A2 + 10**-100))
ss=-y_train/(A2 + 10**-100)

In [21]:
#W1, b1, W2, b2

In [22]:
def for_prop2(w1,b1,w2,b2,xx):
    z1=w1.dot(xx)+b1
    A1=ReLU(z1)
    z2=w2.dot(A1)+b2
    A2=softmax(z2)
    return z1,A1,z2,A2

In [23]:
def make_prdictions(x,W1,b1,W2,b2,):
    _,_,_,A2=for_prop2(W1,b1,W2,b2,x)
    pred=get_predictions(A2)
    return pred

In [24]:
z1,A1,z2,A2=for_prop2(W1, b1, W2, b2,x_test)
pred=make_prdictions(x_test,W1, b1, W2, b2)
pred

array([1, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1,
       2, 1, 1, 2, 2, 2, 2, 1], dtype=int64)

In [25]:
get_accuracy=get_accuracy(pred,y_test)
get_accuracy

0.5666666666666667

In [26]:
from sklearn.metrics import confusion_matrix
def f1_score(y_train, y_pred):
  
    num_classes = len(np.unique(y_train))
    confusion_matrix = np.zeros((3, 3))
    for i in range(len(y_train)):
        confusion_matrix[y_train[i], y_pred[i]] += 1
    
    # Compute the precision and recall for each class
    precision = np.zeros(3)
    recall = np.zeros(3)
    for i in range(3):
        tp = confusion_matrix[i, i]
        fp = np.sum(confusion_matrix[:, i]) - tp
        fn = np.sum(confusion_matrix[i, :]) - tp
        precision[i] = tp / (tp + fp)
        recall[i] = tp / (tp + fn)
    
    # Compute the F1 score for each class
    f1_scores = 2 * precision * recall / (precision + recall)
    
    # Compute the weighted average of the F1 scores
    weights = np.sum(confusion_matrix, axis=1)
    weights /= np.sum(weights)
    f1_weighted = np.sum(f1_scores * weights)
    
    return f1_weighted
f1_score_multiclass(y_train, y_pred)
precision = tp / (tp + fp)
recall = tp / (tp + fn)
f1 = 2 * precision * recall / (precision + recall)

In [31]:
confusion_matrix = np.zeros((3, 3))

In [37]:
for i in range(len(y_train)):
    confusion_matrix[y_train[i], y_pred[i]] += 1

IndexError: arrays used as indices must be of integer (or boolean) type

In [38]:
y_train.shape

(119,)

In [39]:
y_pred.shape

(3, 119)

In [40]:
y_pred

array([[3.00837500e-20, 1.90647272e-20, 5.09894534e-22, 2.97080903e-20,
        4.43579277e-22, 1.97669845e-21, 3.14865596e-20, 3.05760186e-20,
        3.07654463e-22, 2.94685563e-20, 2.83072424e-20, 2.26869204e-20,
        2.91515818e-20, 2.04890258e-22, 2.49702614e-22, 1.88263489e-22,
        1.21533068e-20, 3.03665660e-22, 4.95941050e-22, 2.75378059e-20,
        2.96452224e-20, 3.27378755e-20, 3.07449993e-20, 2.32639964e-20,
        3.05083232e-20, 3.05772917e-20, 1.22653680e-20, 5.37831376e-22,
        2.99294528e-20, 1.66713090e-20, 3.61420292e-22, 4.81046796e-22,
        1.12685720e-20, 3.08805735e-20, 2.93795953e-20, 3.48477213e-22,
        2.85071179e-20, 2.79961606e-20, 1.95308184e-20, 1.91605639e-20,
        1.42079428e-20, 2.97826212e-20, 3.85170854e-22, 2.36400295e-20,
        2.22275024e-20, 3.00412533e-20, 1.53156939e-20, 2.79821515e-20,
        3.10423641e-20, 3.27507509e-22, 4.36073467e-22, 1.04491928e-22,
        2.88955086e-20, 3.67467724e-22, 1.75642986e-20, 3.078577