# Load Libraries

In [29]:
import numpy as np, pandas as pd
from sklearn import datasets

# Load Dataset

In [30]:
df = datasets.load_iris()

In [33]:
X = pd.DataFrame(df.data, columns=df.feature_names)
y = np.copy(df.target)
X.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


#Train test Split

In [36]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=12)

labels = np.copy(y_train)

In [40]:
rng = np.random.RandomState(42)
random_unlabeled_points = rng.rand(len(y_train)) < 0.5

In [41]:
y_train

array([2, 2, 1, 0, 1, 0, 1, 2, 1, 0, 2, 1, 1, 0, 0, 0, 1, 2, 0, 2, 0, 1,
       1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 1, 1, 1, 0, 2, 1, 1, 2,
       0, 2, 0, 2, 2, 2, 2, 1, 1, 1, 1, 2, 0, 2, 2, 0, 1, 0, 2, 2, 0, 1,
       1, 0, 0, 1, 1, 1, 1, 2, 1, 2, 0, 0, 1, 1, 1, 0, 2, 1, 0, 2, 2, 1,
       2, 2, 0, 0, 2, 1, 1, 2, 0, 1, 1, 0, 1, 1, 2, 2, 1, 0, 2, 0, 2, 0,
       0, 1, 2, 2, 1, 2, 2, 0, 1, 1, 0, 2, 2, 2, 1, 2, 2, 2, 0, 0, 1, 0,
       2, 2, 1])

In [42]:
random_unlabeled_points

array([ True, False, False, False,  True,  True,  True, False, False,
       False,  True, False, False,  True,  True,  True,  True, False,
        True,  True, False,  True,  True,  True,  True, False,  True,
       False, False,  True, False,  True,  True, False, False, False,
        True,  True, False,  True,  True,  True,  True, False,  True,
       False,  True, False, False,  True, False, False, False, False,
       False, False,  True,  True,  True,  True,  True,  True, False,
        True,  True, False,  True, False,  True, False, False,  True,
        True, False, False, False, False,  True,  True,  True, False,
       False,  True,  True,  True,  True, False, False, False,  True,
        True, False, False, False, False,  True, False,  True,  True,
        True,  True, False,  True, False, False,  True,  True, False,
        True,  True,  True,  True, False, False, False, False, False,
        True, False, False, False, False,  True,  True,  True,  True,
       False, False,

In [43]:
labels[random_unlabeled_points] = -1
labels

array([-1,  2,  1,  0, -1, -1, -1,  2,  1,  0, -1,  1,  1, -1, -1, -1, -1,
        2, -1, -1,  0, -1, -1, -1, -1,  1, -1,  0,  0, -1,  0, -1, -1,  2,
        0,  2, -1, -1,  1, -1, -1, -1, -1,  2, -1,  2, -1,  2,  2, -1,  2,
        1,  1,  1,  1,  2, -1, -1, -1, -1, -1, -1,  2, -1, -1,  1, -1,  0,
       -1,  1,  1, -1, -1,  2,  1,  2,  0, -1, -1, -1,  1,  0, -1, -1, -1,
       -1,  2,  1,  2, -1, -1,  0,  2,  1,  1, -1,  0, -1, -1, -1, -1,  1,
       -1,  2,  1, -1, -1,  0, -1, -1, -1, -1,  2,  2,  1,  2,  2, -1,  1,
        1,  0,  2, -1, -1, -1, -1,  2,  2, -1,  0, -1, -1, -1, -1,  1])

# Model Creation

In [44]:
from sklearn.semi_supervised import LabelPropagation
from sklearn.metrics import confusion_matrix, classification_report

In [45]:
lp = LabelPropagation(kernel='knn', n_neighbors=9, max_iter=100)
lp.fit(X_train, labels)

LabelPropagation(kernel='knn', max_iter=100, n_neighbors=9)

In [47]:
y_pred = lp.predict(X_train)

In [48]:
print(confusion_matrix(y_train, y_pred))

[[43  0  0]
 [ 0 45  2]
 [ 0  1 44]]


In [49]:
print(classification_report(y_train, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        43
           1       0.98      0.96      0.97        47
           2       0.96      0.98      0.97        45

    accuracy                           0.98       135
   macro avg       0.98      0.98      0.98       135
weighted avg       0.98      0.98      0.98       135



In [51]:
y_pred_test = lp.predict(X_test)
print(confusion_matrix(y_test, y_pred_test))
print(classification_report(y_test, y_pred_test))

[[7 0 0]
 [0 3 0]
 [0 0 5]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         7
           1       1.00      1.00      1.00         3
           2       1.00      1.00      1.00         5

    accuracy                           1.00        15
   macro avg       1.00      1.00      1.00        15
weighted avg       1.00      1.00      1.00        15



In [80]:
X_labeled = X_train.copy()
X_labeled['label'] = labels
X_labeled = X_labeled.loc[X_labeled['label'] != -1]

In [81]:
y_pre_labels = lp.predict(X_labeled.drop(['label'], 1))
print(confusion_matrix(X_labeled['label'], y_pre_labels))
print(classification_report(X_labeled['label'], y_pre_labels))

[[15  0  0]
 [ 0 23  1]
 [ 0  1 24]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       0.96      0.96      0.96        24
           2       0.96      0.96      0.96        25

    accuracy                           0.97        64
   macro avg       0.97      0.97      0.97        64
weighted avg       0.97      0.97      0.97        64



In [70]:
from sklearn.semi_supervised import LabelSpreading

In [76]:
ls = LabelSpreading(kernel='rbf', gamma=21, max_iter=35, alpha=0.3)
ls.fit(X_train, labels)

LabelSpreading(alpha=0.3, gamma=21, max_iter=35)

In [77]:
y_pred = ls.predict(X_train)
print(confusion_matrix(y_train, y_pred))
print(classification_report(y_train, y_pred))

[[43  0  0]
 [ 0 46  1]
 [ 0  0 45]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        43
           1       1.00      0.98      0.99        47
           2       0.98      1.00      0.99        45

    accuracy                           0.99       135
   macro avg       0.99      0.99      0.99       135
weighted avg       0.99      0.99      0.99       135



In [78]:
y_pred_test = ls.predict(X_test)
print(confusion_matrix(y_test, y_pred_test))
print(classification_report(y_test, y_pred_test))

[[7 0 0]
 [0 3 0]
 [0 0 5]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         7
           1       1.00      1.00      1.00         3
           2       1.00      1.00      1.00         5

    accuracy                           1.00        15
   macro avg       1.00      1.00      1.00        15
weighted avg       1.00      1.00      1.00        15



In [82]:
y_pre_labels = ls.predict(X_labeled.drop(['label'], 1))
print(confusion_matrix(X_labeled['label'], y_pre_labels))
print(classification_report(X_labeled['label'], y_pre_labels))

[[15  0  0]
 [ 0 24  0]
 [ 0  0 25]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       1.00      1.00      1.00        24
           2       1.00      1.00      1.00        25

    accuracy                           1.00        64
   macro avg       1.00      1.00      1.00        64
weighted avg       1.00      1.00      1.00        64



In [83]:
X_non_labeled = X_train.copy()
X_non_labeled['label'] = labels
X_non_labeled = X_non_labeled.loc[X_non_labeled['label'] == -1]

In [86]:
ls.predict(X_non_labeled.drop(['label'], 1))

array([2, 2, 0, 1, 2, 0, 0, 0, 1, 0, 2, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0,
       2, 1, 1, 0, 0, 2, 0, 2, 2, 0, 1, 0, 2, 0, 1, 0, 1, 1, 0, 1, 1, 2,
       1, 0, 2, 2, 0, 2, 1, 1, 0, 1, 2, 0, 2, 2, 0, 0, 1, 0, 2, 2, 1, 2,
       0, 1, 0, 2, 2])