# PyCaret 2 Clustering Example
This notebook is created using PyCaret 2.0. Last updated : 31-07-2020

In [1]:
# check version
from pycaret.utils import version
version()

'2.3.5'

# 1. Loading Dataset

In [2]:
from library.common import Database, Core
from pathlib import Path
import pandas as pd
import sys

snapshot = 2019

# Set up the filesystem
cwd = Path.cwd()
model_dir = cwd.parent/r'models'
param_dir = cwd.parent/r'data/processed'

# Set up for common features to use
core = Core()

# Set up for database access
db = Database()

sql_select = \
    r'SELECT cdc."Country Code", cdc."Year", cdc."Urban Population Percent", ' \
    r'cdc."Constant GDP per capita", cdc."Energy Intensity", ' \
    r'cdc."Manufacturing %%", cdc."Trade Openness", ' \
    r'cdc."Renewable Energy Consumption Share", ' \
    r'cdc."Percent of Environment Patent" from consolidated_data_csv cdc'
add_df = pd.read_sql(sql_select, db.engine)
add_df = add_df[add_df['Country Code'].ne('WLD')]
add_key = ['Country Code', 'Year']

db_table = 'owid_co2_greenhouse_gas_emissions'
owid_df = pd.read_sql(db_table, db.engine)
owid_df = owid_df.dropna(subset = ['iso_code']).copy()
owid_df = owid_df[~owid_df['iso_code'].isin(core.excluded_features)].copy()
owkd_key = ['iso_code', 'year']

final_df = owid_df.merge(add_df, how = 'left',  left_on= owkd_key, right_on= add_key, validate = '1:1').fillna(0)
final_df = final_df.drop (['Country Code', 'Year'], axis = 1)
print("Data loaded")

Data loaded


In [3]:

# Clean up the feature names
new_col = []
for col in final_df.columns:
    col = col.lower()
    col = col.replace(' ', '_')
    col = col.replace(r'%', "_pct")
    new_col.append(col)

final_df= final_df[[core.base_features]]


KeyError: "None of [Index([('iso_code', 'year', 'co2', 'consumption_co2', 'trade_co2', 'coal_co2', 'cement_co2', 'flaring_co2', 'gas_co2', 'oil_co2', 'other_industry_co2', 'methane', 'nitrous_oxide', 'population', 'gdp', 'primary_energy_consumption', 'urban_population_percent', 'constant_gdp_per_capita', 'energy_intensity', 'manufacturing__pct', 'trade_openness', 'renewable_energy_consumption_share', 'percent_of_environment_patent')], dtype='object')] are in the [columns]"

In [None]:
source = list(final_df.columns)
for feature in core.base_features:
    if feature not in source:
        print('Not in database ', feature)


In [None]:
list(final_df.columns)

# 2. Initialize Setup

In [None]:
from pycaret.clustering import *
clu1 = setup(data, ignore_features = ['Country Name'], session_id=123, log_experiment=True, log_plots = True, 
             experiment_name='health1')

# 3. Create Model

In [None]:
models()

In [None]:
kmeans = create_model('kmeans', num_clusters = 4)

In [None]:
kmodes = create_model('kmodes', num_clusters = 4)

# 4. Assign Labels

In [None]:
kmeans_results = assign_model(kmeans)
kmeans_results.head()

# 5. Analyze Model

In [None]:
plot_model(kmeans)

In [None]:
plot_model(kmeans, feature = 'Country Name', label=True)

In [None]:
plot_model(kmeans, plot = 'tsne')

In [None]:
plot_model(kmeans, plot = 'elbow')

In [None]:
plot_model(kmeans, plot = 'silhouette')

In [None]:
plot_model(kmeans, plot = 'distance')

In [None]:
plot_model(kmeans, plot = 'distribution')

# 6. Predict Model

In [None]:
pred_new = predict_model(kmeans, data=data)
pred_new.head()

# 7. Save / Load Model

In [None]:
save_model(kmeans, model_name='kmeans')

In [None]:
loaded_kmeans = load_model('kmeans')
print(loaded_kmeans)

In [None]:
from sklearn import set_config
set_config(display='diagram')
loaded_kmeans[0]

In [None]:
from sklearn import set_config
set_config(display='text')

# 8. Deploy Model

In [None]:
deploy_model(kmeans, model_name = 'kmeans-aws', authentication = {'bucket' : 'pycaret-test'})

# 9. Get Config / Set Config

In [None]:
X = get_config('X')
X.head()

In [None]:
get_config('seed')

In [None]:
from pycaret.clustering import set_config
set_config('seed', 999)

In [None]:
get_config('seed')

# 10. MLFlow UI

In [None]:
!mlflow ui

# End
Thank you. For more information / tutorials on PyCaret, please visit https://www.pycaret.org