In [4]:
import csv
import os

import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, recall_score, precision_score
from sklearn.preprocessing import StandardScaler, LabelEncoder

In [6]:
def loadDataMoreInputs(fileName, inputVariabNames, outputVariabName, label_encoder):
    data = []
    dataNames = []
    with open(fileName) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        line_count = 0
        for row in csv_reader:
            if line_count == 0:
                dataNames = row
            else:
                data.append(row)
            line_count += 1
    selectedVariables = [dataNames.index(var) for var in inputVariabNames]
    inputs = [[float(data[i][var]) for var in selectedVariables] for i in range(len(data))]
    selectedOutput = dataNames.index(outputVariabName)
    outputs = [data[i][selectedOutput] for i in range(len(data))]

    #from text to numeric inputs
    outputs_encoded = label_encoder.fit_transform(outputs)

    outputs_encoded = outputs_encoded.reshape(-1, 1)

    return inputs, outputs_encoded

In [29]:
from MyLogisticRegressor import LogisticRegressorMultiClass

crtDir = os.getcwd()
filePath = os.path.join(crtDir, 'data', 'iris.csv')

label_encoder = LabelEncoder()
inputs, outputs = loadDataMoreInputs(filePath, ['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth'],
                                     'Class', label_encoder)

# split data into training data (80%) and testing data (20%) and normalise data
np.random.seed(5)
indexes = [i for i in range(len(inputs))]
trainSample = np.random.choice(indexes, int(0.8 * len(inputs)), replace=False)
testSample = [i for i in indexes if not i in trainSample]

trainInputs = [inputs[i] for i in trainSample]
trainOutputs = [outputs[i] for i in trainSample]
trainOutputs = np.array(trainOutputs).ravel()
testInputs = [inputs[i] for i in testSample]
testOutputs = [outputs[i] for i in testSample]
testOutputs = np.array(testOutputs).ravel()

# normalization
scaler = StandardScaler()
scaler.fit(trainInputs)
trainInputs = scaler.transform(trainInputs)
testInputs = scaler.transform(testInputs)


# training step
modelType = ''
if modelType == "tool":
    model = LogisticRegression()
    model.fit(trainInputs, trainOutputs)
    w0, w1 = model.intercept_, model.coef_[0]
    print('the learnt model: f(x) = ', w0[0], ' + ', w1[0], ' * x1 + ', w1[1], ' * x2')
else:
    model = LogisticRegressorMultiClass(alpha=0.01, max_iter=1000, random_state=42)
    model.fit(trainInputs, trainOutputs)
    first_class = model.classes_[0]
    first_model = model.models[first_class]
    w0 = first_model.intercept_
    w1 = first_model.coef_[0]
    w2 = first_model.coef_[1]

    print('the learnt model for class', first_class, ': f(x) = ', w0, ' + ', w1, ' * x1 + ', w2, ' * x2')


computedTestOutputs = model.predict(testInputs)

print('Accuracy: ', accuracy_score(testOutputs, computedTestOutputs))
print('Precision: ', precision_score(testOutputs, computedTestOutputs, average='weighted'))
print('Recall: ', recall_score(testOutputs, computedTestOutputs, average='weighted'))



normalized_inputs = scaler.transform([[5.35, 3.85, 1.25, 0.4]])
prediction = model.predict(np.array(normalized_inputs))
predicted_species = label_encoder.inverse_transform(prediction)
print("Result: ", predicted_species[0])




the learnt model for class 0 : f(x) =  -0.7952443678448354  +  -0.6378594334897498  * x1 +  0.9173326420289316  * x2
Accuracy:  0.8
Precision:  0.7911111111111111
Recall:  0.8
Result:  Iris-setosa
