# Semi-Supervised Learning: 
## ---Label Propagation and Label Spreading (Experiments v2)---

### Step 1 - Import libraries

In [1]:
# Data manipulation
import pandas as pd # for data manipulation
import numpy as np # for data manipulation

# Visualization
import plotly.express as px # for data visualization
import plotly.graph_objects as go # for data visualization
#import matplotlib.pyplot as plt # for showing handwritten digits

# Skleran
#from sklearn.datasets import load_digits # for MNIST data
#from sklearn.model_selection import train_test_split # for splitting data into train and test samples
from sklearn.metrics import classification_report # for model evaluation metrics
from sklearn.metrics import confusion_matrix # for showing confusion matrix
from sklearn.preprocessing import MinMaxScaler # for encoding of categorical features
from sklearn.datasets import make_moons # for creating dummy data

# Semi-supervised
from sklearn.semi_supervised import LabelSpreading
from sklearn.semi_supervised import LabelPropagation
from sklearn.semi_supervised import SelfTrainingClassifier

# UMAP dimensionality reduction
#from umap import UMAP

# Other utilities
import sys
import os
import getpass
import chart_studio
import chart_studio.plotly as py

# Assign main directory to a variable
main_dir=os.path.dirname(sys.path[0])
#main_dir

In [451]:
# Input Username and Password for Chart-Studio
print('Input Chart-Studio Username: ')
CS_user = input()
print('Input Chart-Studio API Key: ')
CS_api = getpass.getpass()

# Set Chart-Studio Credentials
chart_studio.tools.set_credentials_file(username=CS_user, api_key=CS_api)

### Step 2a - Create dummy data

In [311]:
Xm, ym = make_moons(n_samples=100, shuffle=True, noise=0.05, random_state=42)

In [314]:
# B0 [15]
# 1.048411
# 0.294812
# R1 [85]
# -0.005549455
# 0.3377641
for i in range(0,100):
    print(i, Xm[i])

0 [ 1.55020782 -0.4004788 ]
1 [0.04224297 0.37402443]
2 [ 0.72605176 -0.42881562]
3 [-1.01211162  0.24805519]
4 [-0.87560868  0.2589133 ]
5 [-0.75391335  0.68443635]
6 [0.18249415 0.90296741]
7 [ 1.36170732 -0.44252437]
8 [0.82480336 0.63491665]
9 [ 0.96101491 -0.04219482]
10 [0.39725665 0.86613474]
11 [-0.3378406   0.93279736]
12 [ 1.03565339 -0.54663366]
13 [-0.55749496  0.87633992]
14 [1.88172336 0.0688264 ]
15 [1.04844089 0.29484124]
16 [ 1.11067599 -0.45054759]
17 [ 1.12908379 -0.50299008]
18 [0.64422814 0.68424017]
19 [-0.35665128  0.903928  ]
20 [0.01224211 0.16690287]
21 [ 1.79765012 -0.16101663]
22 [-0.05352697  0.9298129 ]
23 [-0.94448412  0.40855158]
24 [ 0.58913741 -0.29128525]
25 [0.51730337 0.86591647]
26 [-0.87139114  0.51984553]
27 [1.9966253  0.23591553]
28 [0.80495119 0.53821126]
29 [ 0.79809925 -0.52830625]
30 [0.81578231 0.69847774]
31 [-1.01387831  0.12366891]
32 [ 1.68534061 -0.35315543]
33 [-0.2056198   1.07365647]
34 [2.0251685  0.02773743]
35 [0.9109244  0.3235

In [317]:
ym2=np.ones(100)*-1
ym2[15]=0
ym2[85]=1
ym2

array([-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1.,  0., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1.,  1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1.])

In [320]:
# Create a scatter plot
fig = px.scatter(None, x=Xm[:,0], y=Xm[:,1], opacity=1, color=ym.astype(str),
                 color_discrete_sequence=['red', 'blue'],
                )

# Change chart background color
fig.update_layout(dict(plot_bgcolor = 'white'))

# Update axes lines
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='white', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='white', 
                 showline=True, linewidth=1, linecolor='white')

fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='white', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='white', 
                 showline=True, linewidth=1, linecolor='white')

# Set figure title
fig.update_layout(title_text="Example Data")

fig.show()

In [321]:
# Create a scatter plot
fig = px.scatter(None, x=Xm[:,0], y=Xm[:,1], opacity=1, color=ym2.astype(str),
                 color_discrete_sequence=['grey', 'blue', 'red'],
                )

# Change chart background color
fig.update_layout(dict(plot_bgcolor = 'white'))

# Update axes lines
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='white', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='white', 
                 showline=True, linewidth=1, linecolor='white')

fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='white', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='white', 
                 showline=True, linewidth=1, linecolor='white')

# Set figure title
fig.update_layout(title_text="Example Data")

fig.show()

### Step 2b - Get Marketing campaign data
https://www.kaggle.com/rodsaldanha/arketing-campaign

#### Content
- AcceptedCmp1 - 1 if customer accepted the offer in the 1st campaign, 0 otherwise
- AcceptedCmp2 - 1 if customer accepted the offer in the 2nd campaign, 0 otherwise
- AcceptedCmp3 - 1 if customer accepted the offer in the 3rd campaign, 0 otherwise
- AcceptedCmp4 - 1 if customer accepted the offer in the 4th campaign, 0 otherwise
- AcceptedCmp5 - 1 if customer accepted the offer in the 5th campaign, 0 otherwise
- Response (target) - 1 if customer accepted the offer in the last campaign, 0 otherwise
- Complain - 1 if customer complained in the last 2 years
- DtCustomer - date of customer’s enrolment with the company
- Education - customer’s level of education
- Marital - customer’s marital status
- Kidhome - number of small children in customer’s household
- Teenhome - number of teenagers in customer’s household
- Income - customer’s yearly household income
- MntFishProducts - amount spent on fish products in the last 2 years
- MntMeatProducts - amount spent on meat products in the last 2 years
- MntFruits - amount spent on fruits products in the last 2 years
- MntSweetProducts - amount spent on sweet products in the last 2 years
- MntWines - amount spent on wine products in the last 2 years
- MntGoldProds - amount spent on gold products in the last 2 years
- NumDealsPurchases - number of purchases made with discount
- NumCatalogPurchases - number of purchases made using catalogue
- NumStorePurchases - number of purchases made directly in stores
- NumWebPurchases - number of purchases made through company’s web site
- NumWebVisitsMonth - number of visits to company’s web site in the last month
- Recency - number of days since the last purchase

In [3]:
pd.options.display.max_columns=50

In [472]:
# Read in data
df = pd.read_csv(os.path.dirname(sys.path[0])+'/data/marketing_campaign.csv', 
                 encoding='utf-8', delimiter=';',
                 usecols=['ID', 'Year_Birth', 'Education', 'Marital_Status', 'Income', 'Kidhome', 'Teenhome', 
                          'MntWines', 'MntFruits', 'MntMeatProducts', 'MntFishProducts', 'MntSweetProducts'] 
                )
# Create a flag to denote whether the person has any dependants at home (either kids or teens)
df['Dependants_Flag']=df.apply(lambda x: 1 if x['Kidhome']+x['Teenhome']>0 else 0, axis=1)

# For other columns with missing values, fill them in with 0
df=df.fillna(0)

# Randomely select 15% of observations to keep Dependent_flag label with the rest of obs turned to be unlabeled
df['Rand_Selection'] = False
df.loc[df.sample(frac=0.15, random_state=42).index, 'Rand_Selection'] = True
# Create a new target colum with labels, where 1 and 0 are original labels and -1 means unlabeled 
df['Dependants_Target']=df.apply(lambda x: x['Dependants_Flag'] if x['Rand_Selection']==True else -1, axis=1)
# Print dataframe
df

Unnamed: 0,ID,Year_Birth,Education,Marital_Status,Income,Kidhome,Teenhome,MntWines,MntFruits,MntMeatProducts,MntFishProducts,MntSweetProducts,Dependants_Flag,Rand_Selection,Dependants_Target
0,5524,1957,Graduation,Single,58138.0,0,0,635,88,546,172,88,0,False,-1
1,2174,1954,Graduation,Single,46344.0,1,1,11,1,6,2,1,1,False,-1
2,4141,1965,Graduation,Together,71613.0,0,0,426,49,127,111,21,0,False,-1
3,6182,1984,Graduation,Together,26646.0,1,0,11,4,20,10,3,1,False,-1
4,5324,1981,PhD,Married,58293.0,1,0,173,43,118,46,27,1,False,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2235,10870,1967,Graduation,Married,61223.0,0,1,709,43,182,42,118,1,False,-1
2236,4001,1946,PhD,Together,64014.0,2,1,406,0,30,0,0,1,False,-1
2237,7270,1981,Graduation,Divorced,56981.0,0,0,908,48,217,32,12,0,True,0
2238,8235,1956,Master,Together,69245.0,0,1,428,30,214,80,30,1,False,-1


In [473]:
df['Dependants_Target'].value_counts()

-1    1904
 1     225
 0     111
Name: Dependants_Target, dtype: int64

### Step 3 - Data Preparation

There are two categorical fields that we need to encode to numeric: 'Education' and 'Marital_Status'

In [455]:
# Show values and frequencies of the 'Education' and 'Marital_Status' fields
#print(df['Education'].unique())
print(df['Education'].value_counts())
print('***********************************')
print(df['Marital_Status'].value_counts())

Graduation    1127
PhD            486
Master         370
2n Cycle       203
Basic           54
Name: Education, dtype: int64
***********************************
Married     864
Together    580
Single      480
Divorced    232
Widow        77
Alone         3
Absurd        2
YOLO          2
Name: Marital_Status, dtype: int64


In [467]:
# Let's manually encode the two fields based on the order we feel is right
df['Education_enc']=df['Education'].apply(lambda x: 0 if x=='Basic' else
                                                    1 if x=='Graduation' else
                                                    2 if x=='2n Cycle' else
                                                    3 if x=='Master' else
                                                    4 if x=='PhD' else 5
                                         )

df['Marital_Status_enc']=df['Marital_Status'].apply(lambda x: 0 if x in (['Absurd', 'Alone', 'Single', 'YOLO']) else
                                                              1 if x=='Together' else
                                                              2 if x=='Married' else
                                                              3 if x=='Divorced' else
                                                              4 if x=='Widow' else 5
                                         )
df

Unnamed: 0,ID,Year_Birth,Education,Marital_Status,Income,Kidhome,Teenhome,MntWines,MntFruits,MntMeatProducts,MntFishProducts,MntSweetProducts,Dependants_Flag,Rand_Selection,Dependants_Target,Education_enc,Marital_Status_enc
0,5524,1957,Graduation,Single,58138.0,0,0,635,88,546,172,88,0,False,-1,1,0
1,2174,1954,Graduation,Single,46344.0,1,1,11,1,6,2,1,1,False,-1,1,0
2,4141,1965,Graduation,Together,71613.0,0,0,426,49,127,111,21,0,True,0,1,1
3,6182,1984,Graduation,Together,26646.0,1,0,11,4,20,10,3,1,False,-1,1,1
4,5324,1981,PhD,Married,58293.0,1,0,173,43,118,46,27,1,False,-1,4,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2235,10870,1967,Graduation,Married,61223.0,0,1,709,43,182,42,118,1,False,-1,1,2
2236,4001,1946,PhD,Together,64014.0,2,1,406,0,30,0,0,1,False,-1,4,1
2237,7270,1981,Graduation,Divorced,56981.0,0,0,908,48,217,32,12,0,True,0,1,3
2238,8235,1956,Master,Together,69245.0,0,1,428,30,214,80,30,1,True,1,3,1


#### Select data for modeling and apply scaling

In [468]:
# Select data
#X=df[['Year_Birth', 'Education_enc', 'Marital_Status_enc', 'Income', 'MntWines', 'MntFruits', 'MntMeatProducts', 'MntFishProducts', 'MntSweetProducts']]
X=df[['MntMeatProducts', 'MntWines']]
y=df['Dependants_Target'].values

# Perform Min-Max scaling
scaler=MinMaxScaler()
X_scaled=scaler.fit_transform(X)


### Step 4 - Use semi-supervised learning to apply labels to unlabelled data

In [469]:
model_LP_rbf = LabelPropagation(kernel='rbf', # {'knn', 'rbf'} default='rbf'
                              gamma=20, # default=20, Parameter for rbf kernel.
                              #n_neighbors=7, # default=7, Parameter for knn kernel which is a strictly positive integer.
                              max_iter=5000, # default=1000, Maximum number of iterations allowed.
                              tol=0.001, # default=1e-3, Convergence tolerance: threshold to consider the system at steady state.
                              n_jobs=-1, # default=None, The number of parallel jobs to run. -1 means using all processors. 
                             )

model_LP_knn = LabelPropagation(kernel='knn', # {'knn', 'rbf'} default='rbf'
                              #gamma=20, # default=20, Parameter for rbf kernel.
                              n_neighbors=20, # default=7, Parameter for knn kernel which is a strictly positive integer.
                              max_iter=5000, # default=30, Maximum number of iterations allowed.
                              tol=0.001, # default=1e-3, Convergence tolerance: threshold to consider the system at steady state.
                              n_jobs=-1, # default=None, The number of parallel jobs to run. -1 means using all processors. 
                             )

model_LS_rbf = LabelSpreading(kernel='rbf', # {'knn', 'rbf'} default='rbf'
                              gamma=20, # default=20, Parameter for rbf kernel.
                              #n_neighbors=7, # default=7, Parameter for knn kernel which is a strictly positive integer.
                              alpha=0.2, # default=0.2, Clamping factor. A value in (0, 1) that specifies the relative amount that an instance should adopt the information from its neighbors as opposed to its initial label.
                              max_iter=100, # default=30, Maximum number of iterations allowed.
                              tol=0.001, # default=1e-3, Convergence tolerance: threshold to consider the system at steady state.
                              n_jobs=-1, # default=None, The number of parallel jobs to run. -1 means using all processors. 
                             )

model_LS_knn = LabelSpreading(kernel='knn', # {'knn', 'rbf'} default='rbf'
                              #gamma=20, # default=20, Parameter for rbf kernel.
                              n_neighbors=20, # default=7, Parameter for knn kernel which is a strictly positive integer.
                              alpha=0.2, # default=0.2, Clamping factor. A value in (0, 1) that specifies the relative amount that an instance should adopt the information from its neighbors as opposed to its initial label.
                              max_iter=100, # default=30, Maximum number of iterations allowed.
                              tol=0.001, # default=1e-3, Convergence tolerance: threshold to consider the system at steady state.
                              n_jobs=-1, # default=None, The number of parallel jobs to run. -1 means using all processors. 
                             )

In [470]:
# Fit the models
#LP_rbf=model_LP_rbf.fit(X_scaled, y)
LP_knn=model_LP_knn.fit(X_scaled, y)
#LS_rbf=model_LS_rbf.fit(X_scaled, y)
#LS_knn=model_LS_knn.fit(X_scaled, y)

# Print results
#for i in [LP_rbf, LP_knn, LS_rbf, LS_knn]:
for i in [LP_knn]:
    print("Model Name: ", str(i))
    #print("Input array X: ", i.X_)
    print("Classes: ", i.classes_)
    print("Label Distributions: ", i.label_distributions_)
    print("Transduction Label: ", i.transduction_)
    print("No. of features: ", i.n_features_in_)
    #print("Feature names: ", i.feature_names_in_)
    print("No. of iterations: ", i.n_iter_)
    print('*******************************************************************')
    
    print('*************** Evaluation of LP knn model ***************')
    print(classification_report(df['Dependants_Flag'], LP_knn.transduction_))
    print('*************** Confusion Matrix ***************')
    print(confusion_matrix(df['Dependants_Flag'], LP_knn.transduction_))
    print('--------------------------------------------------------')

Model Name:  LabelPropagation(kernel='knn', max_iter=5000, n_jobs=-1, n_neighbors=20)
Classes:  [0 1]
Label Distributions:  [[0.71028564 0.28971436]
 [0.02551948 0.97448052]
 [1.         0.        ]
 ...
 [1.         0.        ]
 [0.         1.        ]
 [0.21730639 0.78269361]]
Transduction Label:  [0 1 0 ... 0 1 1]
No. of features:  2
No. of iterations:  17
*******************************************************************
*************** Evaluation of LP knn model ***************
              precision    recall  f1-score   support

           0       0.92      0.79      0.85       638
           1       0.92      0.97      0.95      1602

    accuracy                           0.92      2240
   macro avg       0.92      0.88      0.90      2240
weighted avg       0.92      0.92      0.92      2240

*************** Confusion Matrix ***************
[[ 507  131]
 [  47 1555]]
--------------------------------------------------------


In [130]:
# Look at classification report to evaluate the model
print('*************** Evaluation of LP rbf model ***************')
print(classification_report(df['Dependants_Flag'], LP_rbf.transduction_))
print(confusion_matrix(df['Dependants_Flag'], LP_rbf.transduction_))
print('--------------------------------------------------------')

print('*************** Evaluation of LP knn model ***************')
print(classification_report(df['Dependants_Flag'], LP_knn.transduction_))
print(confusion_matrix(df['Dependants_Flag'], LP_knn.transduction_))
print('--------------------------------------------------------')

print('*************** Evaluation of LS rbf model ***************')
print(classification_report(df['Dependants_Flag'], LS_rbf.transduction_))
print(confusion_matrix(df['Dependants_Flag'], LS_rbf.transduction_))
print('--------------------------------------------------------')

print('*************** Evaluation of LS knn model ***************')
print(classification_report(df['Dependants_Flag'], LS_knn.transduction_))
print(confusion_matrix(df['Dependants_Flag'], LS_knn.transduction_))
print('--------------------------------------------------------')

*************** Evaluation of LP rbf model ***************
              precision    recall  f1-score   support

           0       0.99      0.27      0.42       638
           1       0.77      1.00      0.87      1602

    accuracy                           0.79      2240
   macro avg       0.88      0.63      0.65      2240
weighted avg       0.83      0.79      0.74      2240

[[ 170  468]
 [   2 1600]]
--------------------------------------------------------
*************** Evaluation of LP knn model ***************
              precision    recall  f1-score   support

           0       0.82      0.67      0.74       638
           1       0.88      0.94      0.91      1602

    accuracy                           0.86      2240
   macro avg       0.85      0.80      0.82      2240
weighted avg       0.86      0.86      0.86      2240

[[ 426  212]
 [  95 1507]]
--------------------------------------------------------
*************** Evaluation of LS rbf model ***************
 

#### Plot a 3D scatter plo to display the results

In [471]:
# Specify a size of the mesh to be used
mesh_size=10
margin=0

# Create a mesh grid on which we will run our model
x_min, x_max = df['MntMeatProducts'].min() - margin, df['MntMeatProducts'].max() + margin
y_min, y_max = df['MntWines'].min() - margin, df['MntWines'].max() + margin
xrange = np.arange(x_min, x_max, mesh_size)
yrange = np.arange(y_min, y_max, mesh_size)
xx, yy = np.meshgrid(xrange, yrange)

# Calculate predictions on grid
Z = np.ones_like(xx)*0.5
#Z = Z.reshape(xx.shape)


# Create a 3D scatter plot
fig = px.scatter_3d(df, x=df['MntMeatProducts'], y=df['MntWines'], z=LP_knn.label_distributions_[:,1], 
                    color=df['Dependants_Flag'].astype('str'),
                    color_discrete_sequence=['blue', 'red'],
                    #opacity=1,  
                    hover_data=['Marital_Status', 
                                'MntWines', 'Dependants_Target',
                                'Dependants_Flag',
                               ],
                    height=900, width=900
                   )

# Update chart looks
fig.update_layout(#title_text="Scatter 3D Plot",
                  showlegend=False,
                  #legend=dict(orientation="h", yanchor="top", y=1, xanchor="center", x=0.5),
                  scene_camera=dict(up=dict(x=0, y=0, z=1), 
                                        center=dict(x=0, y=0, z=-0.2),
                                        eye=dict(x=-1.5, y=1.5, z=0.5)),
                                        margin=dict(l=0, r=0, b=0, t=0),
                  scene = dict(xaxis=dict(backgroundcolor='white',
                                          color='black',
                                          gridcolor='#f0f0f0',
                                          title_font=dict(size=10),
                                          tickfont=dict(size=10),
                                          #dtick=0.01,
                                         ),
                               yaxis=dict(backgroundcolor='white',
                                          color='black',
                                          gridcolor='#f0f0f0',
                                          title_font=dict(size=10),
                                          tickfont=dict(size=10),
                                          #dtick=0.01,
                                          ),
                               zaxis=dict(backgroundcolor='lightgrey',
                                          color='black', 
                                          gridcolor='#f0f0f0',
                                          title_font=dict(size=10),
                                          tickfont=dict(size=10),
                                          dtick=0.1,
                                         )))

# Update marker size
fig.update_traces(marker=dict(size=3))

# Add prediction plane
fig.add_traces(go.Surface(x=xrange, 
                          y=yrange, 
                          z=Z, 
                          name='Separator',
                          colorscale='Gray',
                          opacity=0.5,
                          showscale=False, 
                          contours = {"z": {"show": True, "start": 0.5, "end": 0.9, "size": 0.5}}))

fig.show()

---

### Step 6 - Use example data for gif illustrations

In [439]:
#dfe=pd.read_csv('/Users/solclover/Documents/Python/SAS_to_Python/038_ML_Semi_Supervised/Example_data_v3.csv', encoding='utf-8')
dfe=pd.read_csv('/Users/solclover/Documents/Python/SAS_to_Python/038_ML_Semi_Supervised/Example_data_v4.csv', encoding='utf-8')
dfe


Unnamed: 0,Dim1,Dim2,Label
0,8,28,-1
1,19,35,0
2,45,38,-1
3,49,16,-1
4,40,1,-1
...,...,...,...
95,85,69,-1
96,91,66,-1
97,50,61,-1
98,89,88,-1


In [440]:
# Create a scatter plot
fig = px.scatter(dfe, x=dfe['Dim1'], y=dfe['Dim2'], opacity=1, color=dfe['Label'].astype(str),
                 color_discrete_sequence=['lightgrey', 'blue', 'red'],
                )

# Change chart background color
fig.update_layout(dict(plot_bgcolor = 'white'))

# Update axes lines
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='white', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='white', 
                 showline=True, linewidth=1, linecolor='white')

fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='white', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='white', 
                 showline=True, linewidth=1, linecolor='white')

# Set figure title
fig.update_layout(title_text="Example Data")

# Update marker size
#fig.update_traces(marker=dict(size=7, line=dict(color='black', width=0.2)))

#fig.show()

In [325]:
# Define model hyperparameters
model_LP_knn_e = LabelPropagation(kernel='rbf', 
                                  #n_neighbors=20, 
                                  max_iter=1000, n_jobs=-1)

In [371]:
dfe

Unnamed: 0,Dim1,Dim2,Label
0,8,28,-1
1,19,35,0
2,45,38,-1
3,49,16,-1
4,40,1,-1
...,...,...,...
95,85,69,-1
96,91,66,-1
97,50,61,-1
98,89,88,-1


In [443]:
# Select data
Xe=dfe[['Dim1', 'Dim2']]
ye=dfe['Label'].values

# Create empty dataframe to store results
#dfe_res=pd.DataFrame()
dfe_res=dfe.copy()
dfe_res['Zero']=0
dfe_res['One']=0
dfe_res['Label_pred']=ye
dfe_res['Iteration']=0

# Loop through the model saving results for different number of iterations
#for i in range(1,880): 
for i in range(1,6): # run for 6 iterations
    model_LP_knn_e = LabelPropagation(kernel='knn', 
                                      n_neighbors=6, 
                                      #gamma=0.1,
                                      max_iter=i, n_jobs=-1)
    LP_knn_e=model_LP_knn_e.fit(Xe, ye)
    
    dfe_temp=pd.DataFrame(LP_knn_e.label_distributions_, columns=['Zero', 'One'])
    dfe_temp=pd.concat([dfe, dfe_temp], axis=1)
    dfe_temp['Label_pred']=dfe_temp.apply(lambda x: -1 if x['Zero']+x['One']==0 else round(x['One']), axis=1)
    dfe_temp['Iteration']=i

    dfe_res=pd.concat([dfe_res, dfe_temp])
    
#print(LP_knn_e.label_distributions_)
#print(LP_knn_e.transduction_)
print("No. of iterations: ", LP_knn_e.n_iter_)


max_iter=1 was reached without convergence.


max_iter=2 was reached without convergence.


max_iter=3 was reached without convergence.


max_iter=4 was reached without convergence.



No. of iterations:  5



max_iter=5 was reached without convergence.



In [422]:
dfe_res

Unnamed: 0,Dim1,Dim2,Label,Zero,One,Label_pred,Iteration
0,8,28,-1,0.000000,0.000000,-1,0
1,19,35,0,0.000000,0.000000,0,0
2,45,38,-1,0.000000,0.000000,-1,0
3,49,16,-1,0.000000,0.000000,-1,0
4,40,1,-1,0.000000,0.000000,-1,0
...,...,...,...,...,...,...,...
95,85,69,-1,0.921995,0.078005,0,999
96,91,66,-1,0.919534,0.080466,0,999
97,50,61,-1,0.018196,0.981804,1,999
98,89,88,-1,0.917483,0.082517,0,999


In [367]:
dfe_temp

Unnamed: 0,Dim1,Dim2,Label,Zero,One,Label_pred,Iteration,Zero.1,One.1
0,8,28,-1,0,0,-1,0,0.0,0.0
1,19,35,0,0,0,0,0,1.0,0.0
2,45,38,-1,0,0,-1,0,0.0,0.0
3,49,16,-1,0,0,-1,0,0.0,0.0
4,40,1,-1,0,0,-1,0,0.0,0.0
...,...,...,...,...,...,...,...,...,...
95,85,69,-1,0,0,-1,0,0.0,0.0
96,91,66,-1,0,0,-1,0,0.0,0.0
97,50,61,-1,0,0,-1,0,0.0,1.0
98,89,88,-1,0,0,-1,0,0.0,0.0


##### Create animated visualization

In [None]:
fig.add_annotation(x=4, y=4,
            text="Text annotation without arrow",
            showarrow=False,
            yshift=10)

In [444]:
fig = px.scatter(dfe_res, x="Dim1", y="Dim2", animation_frame="Iteration", 
           #animation_group="country",
           #size="pop", 
           #color=df['Label_Pred'].astype(str),
           color=dfe_res['Label_pred'].astype(str), 
           color_discrete_sequence=['lightgrey', 'blue', 'red'],
           #hover_name="Label",
           #color_continuous_scale='Bluered',
           #log_x=True, size_max=55, 
           #range_x=[100,100000], range_y=[25,90]
          )

# Change chart background color
fig.update_layout(dict(plot_bgcolor = 'white'))

# Update axes lines
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                 showline=True, linewidth=1, linecolor='black')

fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                 showline=True, linewidth=1, linecolor='black')

# Set figure title
fig.update_layout(title_text="Label Propagation")

# Update marker size
fig.update_traces(marker=dict(size=5))

fig.show()

In [449]:
# Specify a size of the mesh to be used
mesh_size=10
margin=10

# Create a mesh grid on which we will run our model
x_min, x_max = dfe_temp['Dim1'].min() - margin, dfe_temp['Dim1'].max() + margin
y_min, y_max = dfe_temp['Dim2'].min() - margin, dfe_temp['Dim2'].max() + margin
xrange = np.arange(x_min, x_max, mesh_size)
yrange = np.arange(y_min, y_max, mesh_size)
xx, yy = np.meshgrid(xrange, yrange)

# Calculate predictions on grid
Z = np.ones_like(xx)*0.5
#Z = Z.reshape(xx.shape)


# Create a 3D scatter plot
fig = px.scatter_3d(dfe_temp, x='Dim1', y='Dim2', z='One', 
                    color='One',
                    color_continuous_scale='Bluered',
                    #opacity=1,  
                    hover_data=['Dim1', 'Dim2', 'Zero', 'One', 'Label_pred'],
                    height=900, width=900
                   )

# Update chart looks
fig.update_layout(#title_text="Scatter 3D Plot",
                  showlegend=False,
                  coloraxis_showscale=False,
                  #legend=dict(orientation="h", yanchor="top", y=1, xanchor="center", x=0.5),
                  scene_camera=dict(up=dict(x=0, y=0, z=1), 
                                        center=dict(x=0, y=0, z=-0.2),
                                        eye=dict(x=-1.5, y=1.5, z=0.5)),
                                        margin=dict(l=0, r=0, b=0, t=0),
                  scene = dict(xaxis=dict(backgroundcolor='white',
                                          color='black',
                                          gridcolor='#f0f0f0',
                                          title_font=dict(size=10),
                                          tickfont=dict(size=10),
                                          #dtick=0.01,
                                         ),
                               yaxis=dict(backgroundcolor='white',
                                          color='black',
                                          gridcolor='#f0f0f0',
                                          title_font=dict(size=10),
                                          tickfont=dict(size=10),
                                          #dtick=0.01,
                                          ),
                               zaxis=dict(backgroundcolor='lightgrey',
                                          color='black', 
                                          gridcolor='#f0f0f0',
                                          title_font=dict(size=10),
                                          title_text='Probability',
                                          tickfont=dict(size=10),
                                          dtick=0.1,
                                         )))

# Update marker size
fig.update_traces(marker=dict(size=3))

# Add prediction plane
fig.add_traces(go.Surface(x=xrange, 
                          y=yrange, 
                          z=Z, 
                          name='Separator',
                          colorscale='Gray',
                          opacity=0.2,
                          showscale=False,))

fig.show()
#fig.write_html('Example_results.html')

In [452]:
# Export
#py.plot(fig, filename = 'Art038_3D_LP_probs', auto_open=True, sharing='public')

'https://plotly.com/~SolClover/182/'