## Import modules
In particular, import numpy, matplotlib and sklearn

In [None]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn import metrics, tree, ensemble
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler

##Â Plots PhD thesis 
Plot of the analysis output (satisfaction probability) over time

In [None]:
#load data
data=np.loadtxt("./Output_M1.txt")
X=data[:, 0][0:200]
Y2=data[:, 2][0:200]

#plot data
plt.plot(X,Y2,linewidth=0.75,color='crimson',label = '$p^*_{M_1}$')
plt.xlabel('Time')
plt.ylabel('Estimated satisfaction probability')
plt.legend()
plt.show()

## Simple project
Temperature conversion: Celsius to Fahrenheit and Fahrenheit to Celsius

In [None]:
print('Welcome To Temperature Converter!')

#input value and scale
temp = None
while temp is None:
    try:
        temp = float(input("Enter the temperature: "))
    except ValueError:
        print ("Please enter an appropriate temperature value!")

scale=input("Enter the scale (C/F): ")

a = '\u00b0'

#if loop for conversion
if (scale == "C"):
    t = temp * 1.8 + 32
    print("The temperature is", str(round(t,1)) + a + "F")
elif (scale == "F"):
    t = (temp - 32)/1.8
    print("The temperature is", str(round(t,1)) + a + "C")
else: print("Please provide a valid scale")

## Supervised ML algorithms
Presentation of different ML classification algorithms using the iris data set

In [None]:
#load dataset
data = load_iris()

In [None]:
print(data.keys())

Create feature and target sets:

- feature_names: names of the 4 features (sepal length , sepal width, petal length, petal width)
- features: numeric values of the 4 features
- target_names: setosa/versicolor/virginica
- targets: 0 (setosa), 1 (versicolor), 2(virginica)

In [None]:
feature_names = data['feature_names']
features = data['data']
target_names = data['target_names']
targets = data['target']

print(feature_names)
print(features[0])
print(target_names)
print(targets[0:3])

Explore target distribution 

In [None]:
unique, counts = np.unique(targets, return_counts=True)
print(dict(zip(unique, counts)))

Split the data (70% train, 30% test)

In [None]:
train, test, train_targets, test_targets = train_test_split(features,targets,test_size=0.3,random_state=1)

Observe how the target classes are distributed within the two datasets (train and test)

In [None]:
#input for barplot
unique_tr, counts_tr = np.unique(train_targets, return_counts=True)
unique_te, counts_te = np.unique(test_targets, return_counts=True)

#plot train and test data distribution
plt.figure(figsize=(15,6))

#train data
#subplot entries: rows, columns, index of the current plot
plt.subplot(1,2,1)
#barplot
y_pos = unique_tr
height = counts_tr
# Create bars
plt.bar(y_pos, height, color=['red', 'green', 'blue'])
# Create names on the x-axis
bars = target_names
plt.xticks(y_pos, bars)
plt.title('Train')

#test data
plt.subplot(1,2,2)
#barplot
y_pos = unique_te
height = counts_te
# Create bars
plt.bar(y_pos, height, color=['red', 'green', 'blue'])
# Create names on the x-axis
bars = target_names
plt.xticks(y_pos, bars)
plt.title('Test');

Stratification (by targets)

In [None]:
train, test, train_targets, test_targets = train_test_split(features,targets,test_size=0.3,random_state=1,stratify=targets)

Create barplot again to compare the distribution

In [None]:
#input for barplot
unique_tr, counts_tr = np.unique(train_targets, return_counts=True)
unique_te, counts_te = np.unique(test_targets, return_counts=True)

#plot train and test data distribution
plt.figure(figsize=(15,6))

#train data
#subplot entries: rows, columns, index of the current plot
plt.subplot(1,2,1)
#barplot
y_pos = unique_tr
height = counts_tr
# Create bars
plt.bar(y_pos, height, color=['red', 'green', 'blue'])
# Create names on the x-axis
bars = target_names
plt.xticks(y_pos, bars)
plt.title('Train')

#test data
plt.subplot(1,2,2)
#barplot
y_pos = unique_te
height = counts_te
# Create bars
plt.bar(y_pos, height, color=['red', 'green', 'blue'])
# Create names on the x-axis
bars = target_names
plt.xticks(y_pos, bars)
plt.title('Test');

### k-nearest neighbor (KNN) classification 

Feature scaling

In [None]:
scaler = StandardScaler().fit(train)
train_s = scaler.transform(train)
test_s = scaler.transform(test)

Create, fit, predict using KNN classifier

In [None]:
KNN_model = KNeighborsClassifier(n_neighbors=3)
KNN_model.fit(train_s, train_targets)
KNN_prediction = KNN_model.predict(test_s)

Evaluate accuracy of classifier

In [None]:
a = accuracy_score(test_targets, KNN_prediction)
print(a)

#store value
acc_v = []
acc_v.append(a)

How accuracy is calculated:

In [None]:
unique, counts = np.unique(KNN_prediction==test_targets, return_counts=True)
d = dict(zip(unique, counts)) 
d[True]/(d[False] + d[True])

Other classification performance metrics:

In [None]:
print("Classification report:")
print(metrics.classification_report(test_targets, KNN_prediction)) 
#Discuss accuracy issue when unbalanced classes

Explore accuracy using different k values (defined in k_values)


In [None]:
acc_vector = []
k_values = range(1,50,2)
for i in k_values:
    KNN_model = KNeighborsClassifier(n_neighbors=i)
    KNN_model.fit(train_s, train_targets)
    KNN_prediction = KNN_model.predict(test_s)
    a = accuracy_score(test_targets, KNN_prediction)
    acc_vector.append(a)

Plot accuracy (y-axis) as a function of k value (x-axis)

In [None]:
plt.figure(figsize=(12,5))
plt.plot(range(len(k_values)), acc_vector, 'o', color ="green")
plt.title("Accuracy comparison")
plt.xlabel("k value")
plt.ylabel("Accuracy")

#add appropriate x-axis labels
x1 = range(len(k_values))
neigh = k_values
ax = plt.gca()
ax.set_xticks(x1)
ax.set_xticklabels(neigh, minor=False)

plt.show()

### Decision Trees
Create, fit, predict, evaluate accuracy: Decision Tree

In [None]:
dt_classifier = tree.DecisionTreeClassifier() 
dt_classifier.fit(train, train_targets)
pred_ct = dt_classifier.predict(test)
a = accuracy_score(test_targets, pred_ct)
acc_v.append(a)
print(a)

### Random Forests
Create, fit, predict, evaluate accuracy: Random Forest


In [None]:
rf_classifier = ensemble.RandomForestClassifier(n_estimators=5)   # number of trees
rf_model = rf_classifier.fit(train, train_targets)
pred_rf = rf_model.predict(test)
a = accuracy_score(test_targets, pred_rf)
acc_v.append(a)
print(a)

### Accuracy comparison 



Plot of accuracy of various methods for comparison

In [None]:
plt.plot(range(len(acc_v)), acc_v, 'o', color ="green")
plt.title("Accuracy comparison")
plt.xlabel("Method")
plt.ylabel("Accuracy")

x1 = range(len(acc_v))
neigh = ['KNN', 'DT', 'RF']
ax = plt.gca()
ax.set_xticks(x1)
ax.set_xticklabels(neigh, minor=False)
plt.show()