# Question 1

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, recall_score, precision_score, accuracy_score

In [2]:
feature_names = {i:label for i,label in zip(range(4),
                  ('sepal length in cm',
                  'sepal width in cm',
                  'petal length in cm',
                  'petal width in cm', ))}

#### Reading data to pandas data frame and splitting them as unique class labels in one class

In [3]:
df = pd.read_csv('iris.data', header=None, sep=',')

In [4]:
df.columns = [l for i,l in sorted(feature_names.items())] + ['class label']
df.dropna(axis=0,how="all",inplace=True)
df.describe()

Unnamed: 0,sepal length in cm,sepal width in cm,petal length in cm,petal width in cm
count,150.0,150.0,150.0,150.0
mean,5.843333,3.054,3.758667,1.198667
std,0.828066,0.433594,1.76442,0.763161
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [5]:
X= df.iloc[:,0:4]
y = df.iloc[:,-1]

In [6]:
class_label = df['class label'].unique()
setosa = df[df['class label'] == class_label[0]]
versicolor = df[df['class label'] == class_label[1]]
virginica = df[df['class label'] == class_label[2]]

In [7]:
df.tail()

Unnamed: 0,sepal length in cm,sepal width in cm,petal length in cm,petal width in cm,class label
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica
149,5.9,3.0,5.1,1.8,Iris-virginica


In [8]:
frame12 =  [setosa,versicolor]
frame13 = [setosa,virginica]
frame14 = [versicolor,virginica]

df_sVer = pd.concat(frame12)
df_sVir = pd.concat(frame13)
df_verVir = pd.concat(frame14)

In [9]:
X_12 = df_sVer.iloc[:, 0:4].values
y_12 = df_sVer.iloc[:, -1].values

X_12_train, X_12_test, y_12_train, y_12_test = train_test_split(X_12, y_12, test_size = 0.2, random_state = 42)

In [10]:
sc = StandardScaler()
X_12_train = sc.fit_transform(X_12_train)
X_12_test = sc.transform(X_12_test)

#### LDA for classes Setosa and Versicolor

In [11]:
lda = LDA()
X_12_train = lda.fit_transform(X_12_train, y_12_train)
X_12_test = lda.transform(X_12_test)

#### Logistic Regression for Setosa and Versicolor

In [12]:
classifier = LogisticRegression(random_state = 42)
classifier.fit(X_12_train, y_12_train)

LogisticRegression(random_state=42)

In [13]:
y_12_pred = classifier.predict(X_12_test)
matrix = confusion_matrix(y_12_test, y_12_pred)
print(matrix)
print('Accuracy : ' + str(accuracy_score(y_12_test, y_12_pred)))

[[12  0]
 [ 0  8]]
Accuracy : 1.0


In [14]:
X_13 = df_sVir.iloc[:, 0:4].values
y_13 = df_sVir.iloc[:, -1].values

X_13_train, X_13_test, y_13_train, y_13_test = train_test_split(X_13, y_13, test_size = 0.2, random_state = 42)

X_13_train = sc.fit_transform(X_13_train)
X_13_test = sc.transform(X_13_test)

In [15]:
X_13_train = lda.fit_transform(X_13_train, y_13_train)
X_13_test = lda.transform(X_13_test)

In [16]:
classifier = LogisticRegression(random_state = 42)
classifier.fit(X_13_train, y_13_train)

y_13_pred = classifier.predict(X_13_test)
matrix = confusion_matrix(y_13_test, y_13_pred)
print(matrix)
print('Accuracy : ' + str(accuracy_score(y_13_test, y_13_pred)))

[[12  0]
 [ 0  8]]
Accuracy : 1.0


#### Splitting the train and test data

In [17]:
X_23 = df_verVir.iloc[:, 0:4].values
y_23 = df_verVir.iloc[:, -1].values

X_23_train, X_23_test, y_23_train, y_23_test = train_test_split(X_23, y_23, test_size = 0.2, random_state = 42)

X_23_train = sc.fit_transform(X_23_train)
X_23_test = sc.transform(X_23_test)

#### LDA for classes Versicolor and Virginica

In [18]:
X_23_train = lda.fit_transform(X_23_train, y_23_train)
X_23_test = lda.transform(X_23_test)

#### Logistic Regression for Versicolor and Virginica

In [19]:
classifier = LogisticRegression(random_state = 42)
classifier.fit(X_23_train, y_23_train)

y_23_pred = classifier.predict(X_23_test)
matrix = confusion_matrix(y_23_test, y_23_pred)
print(matrix)
print('Accuracy : ' + str(accuracy_score(y_23_test, y_23_pred)))

[[9 3]
 [1 7]]
Accuracy : 0.8


# Question 2

#### Function to initialize the weights and bias

In [20]:
def initialize(m):
    a = np.zeros((m,1))
    b = 0
    return a , b

#### Calculating sigmoid of x    


In [21]:
def sigmoid(X):
    return 1/(1 + np.exp(- X))  

#### Forward and back propogation# 

In [22]:
def propogate(X, Y, w, b):
    m = X.shape[1] 
    #calculating the cost
    Z = np.dot(w.T, X) + b;    
    A = sigmoid(Z)
    cost= -(1/m) * np.sum(Y * np.log(A) + (1-Y) * np.log(1-A))
    
    #calculating the gradients
    dw = (1/m)* np.dot(X, (A-Y).T)
    db = (1/m)* np.sum(A-Y)
    
    grads= {"dw" : dw, "db" : db}
    
    return grads, cost

#### Function for performing Grdient Descent

In [23]:
def optimize(X, Y, w, b, num_of_iterations, alpha):
    
    costs=[] 
    
    for i in range(num_of_iterations):
 
        grads, cost = propogate(X, Y, w, b)
        
        dw = grads["dw"]
        db = grads["db"]
        
        w = w - alpha * dw
        b = b - alpha * db
        
            
    parameters = {"w":w, "b":b}
    grads = {"dw":dw, "db":db}
    
    
    return parameters, grads, costs

#### Predictions on the data set

In [24]:
def predict(X, w, b):
    
    m = X.shape[1] #Number of training examples
    
    y_prediction =  np.zeros((1,m))
    
    w = w.reshape(X.shape[0], 1)
    
    S=sigmoid(np.dot(w.T, X)+b)
    
    
    for i in range(S.shape[1]):
        
        if(S[0,i]<0.5):
            y_prediction[0,i]=0
        else:
            y_prediction[0,i]=1
            
    
    return y_prediction

#### Function for calculating the Logistic Regression Model

In [25]:
def model(Xtrain, Ytrain, num_of_iterations, alpha):
    
    dim = Xtrain.shape[0] #Number of features
    
    w,b = initialize(dim)
    
    parameters, grads, costs = optimize(Xtrain, Ytrain, w, b, num_of_iterations, alpha) 
    
    w = parameters["w"]
    b = parameters["b"]
        
    
    d={"w":w, "b":b, "costs": costs}
    
    return d

In [26]:
def init():
    
    feature_names = {i:label for i,label in zip(range(10),
                  ('Sample code number','Clump Thickness','Uniformity of Cell Size','Uniformity of Cell Shape','Marginal Adhesion',
                  'Single Epithelial Cell Size','Bare Nuclei','Bland Chromatin','Normal Nucleoli','Mitoses',
                  'Class:'))}
    
    
    df = pd.read_csv('breast-cancer-wisconsin.data',header=None,sep=',',na_values=['?','na','NA','NAN','nan','NaN'])
    
    df.columns = [l for i,l in sorted(feature_names.items())] + ['class label']
    
    df['class label'] = df['class label'].map({2:0,4:1})  
    df = df.dropna()
    df = df.reset_index(drop=True)

    xi = df.drop(['class label'], axis = 1)
    y = df['class label'].values
    
    #splitting Train and Test data
    X = (xi - np.min(xi))/(np.max(xi) - np.min(xi)).values
    train_x, test_x, train_y, test_y = train_test_split(X, y, test_size = 0.20, random_state = 42)
    
    #putting Traing and Test Data to numpy array
    train_x=np.asarray(train_x)
    train_y=np.asarray(train_y)
    test_x=np.asarray(test_x)
    test_y=np.asarray(test_y)
    
    #model function is used for a Logistic Regression Model on Training Data
    d= model(train_x.T, train_y.T, num_of_iterations=10, alpha=0.000001)
    
    costs=d["costs"]
    w=d["w"]
    b=d["b"]
        
    
    #the accuracy on Training and Test Data
    Y_prediction_train = predict(train_x.T, w, b)  
    Y_prediction_test = predict(test_x.T, w, b)
   
    print("\nAccuracy: {} %".format(100-np.mean(np.abs(Y_prediction_test - test_y.T)) * 100))

In [27]:
init()



Accuracy: 94.8905109489051 %
