In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.cluster import DBSCAN, OPTICS
from sklearn.manifold import TSNE
from sklearn import metrics

import warnings
warnings.filterwarnings('ignore')

In [None]:
profiles = pd.read_csv("CSV Files/customer_profiles.csv")
details = pd.read_csv("CSV Files/ad_details.csv")
events = pd.read_csv("CSV Files/ad_events.csv")

In [None]:
for col in profiles.drop(['id', 'age', 'income', 'became_member_on (year month day)'], axis=1).columns:
    if profiles[col].dtype == object:
        le = LabelEncoder()
        profiles[col] = le.fit_transform(profiles[col])

In [None]:
scaler = StandardScaler()
data = scaler.fit_transform(profiles.drop(['id'], axis=1))

In [None]:
model = TSNE(n_components=2, random_state=0)
tsne_data = model.fit_transform(data)
plt.figure(figsize=(7, 7))
plt.scatter(tsne_data[:, 0], tsne_data[:, 1])
plt.show()

In [None]:
# model = DBSCAN(eps=1.9, min_samples=10).fit(data)
# segments = model.fit_predict(data)

In [None]:
model = OPTICS(min_samples=2).fit(data)
segments = model.fit_predict(data)

In [None]:
core_samples_mask = np.zeros_like(model.labels_, dtype=bool)
core_samples_mask[model.core_sample_indices_] = True

In [None]:
labels = model.labels_

In [None]:
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
n_noise_ = list(labels).count(-1)

In [None]:
print(n_clusters_)
print(n_noise_)

In [None]:
plt.figure(figsize=(7, 7))
sns.scatterplot(tsne_data[:, 0], tsne_data[:, 1], hue=segments)
plt.show()

In [None]:
sc = metrics.silhouette_score(data, labels)
print("Silhouette Coefficient:%0.4f" % sc)