In [1]:
import pandas as pd
from sklearn.cluster import AgglomerativeClustering, OPTICS


import predictive_clustering,utils

## Generating the Ordered Documents for the best clusters of each type


In [2]:
baseline = pd.read_excel("RETOS_BEBRASK_Baseline.xlsx")
scales = pd.read_excel("filled_scales_BEBRASK_RETOS.xlsx")
scaled_df = pd.read_excel("baseline_scaled.xlsx")
scaled_transformed_df = pd.read_excel("count_scaled_transformed.xlsx")

In [3]:
#Grouping the metrics that will be evaluated from scales for the BEBRASK and RETOS dataset (as the rest were no used
#or contain to many NaNs
PANAS = ["PA", "NA."]
ERQ = ["ERQ_CR", "ERQ_ES"]
UPPSP = ["UPPSP_NU", "UPPSP_PU", "UPPSP_SS", "UPPSP_PMD", "UPPSP_PSV"]
BIS_BAS = ["BIS", "BAS_D", "BAS_RR", "BAS_FS"]
TEPS = ["TEPS_AF", "TEPS_CF"]
SHS = ["SHS"]
FS = ["FS"]
LOTR = ["LOT_R"]
RRQ = ["RRQ_Rum", "RRQ_Ref"]
ASI3 = ["ASI_P", "ASI_C", "ASI_S"]
SPQ = ["SPQ", "SPQ_IR"]
MSSB = ["MSSB_POS", "MSSB_NEG", "MSSB_DES"]

list_metrics = [PANAS, ERQ, UPPSP, BIS_BAS, TEPS, SHS, FS, LOTR, RRQ, ASI3, SPQ, MSSB]
#### Importing the TimeSeries Dataset to use it for analysis later on

### Baseline Hierarchical 6 clusters

In [5]:
scaled_df = scaled_df.drop([57, 90, 123, 96, 5, 133, 43, 81]).reset_index().drop("index", axis=1)
baseline = baseline.drop([57, 90, 123, 96, 5, 133, 43, 81]).reset_index().drop("index", axis=1)


data_clusters = predictive_clustering.clustering(scaled_df, AgglomerativeClustering,
                                                 {'n_clusters': 4, 'linkage': "complete"}, fit=True)
df_cluster = pd.concat([baseline["Subject"].copy(), pd.Series(data_clusters)], axis=1)
df_cluster.columns = ["Subject", "clusters"]

#AVERAGE RATING0

name_output = f"baseline_hierarchical_{4}_clusters_avg_rating.docx"
new_data = pd.merge(scales.copy(), df_cluster.copy(), right_on='Subject', left_on='EPRIME_CODE')
new_data.drop("Subject", axis=1, inplace=True)
df = utils.filter_data(new_data)
utils.create_word(df, list_metrics, name_output, df_scales=scales, cluster_order=[3,1,0,2])

#CORRELATION

name_output = f"baseline_hierarchical_{4}_clusters_correlation.docx"
new_data = pd.merge(scales.copy(), df_cluster.copy(), right_on='Subject', left_on='EPRIME_CODE')
new_data.drop("Subject", axis=1, inplace=True)
df = utils.filter_data(new_data)
utils.create_word(df, list_metrics, name_output, df_scales=scales, cluster_order=[3,0,1,2])

#DIFFERENCE MATCH

name_output = f"baseline_hierarchical_{4}_clusters_dif_match.docx"
new_data = pd.merge(scales.copy(), df_cluster.copy(), right_on='Subject', left_on='EPRIME_CODE')
new_data.drop("Subject", axis=1, inplace=True)
df = utils.filter_data(new_data)
utils.create_word(df, list_metrics, name_output, df_scales=scales, cluster_order=[1,3,0,2])
#### Baseline OPTICS

### OPTICS Baseline 6 min sample

In [ ]:
data_clusters = predictive_clustering.clustering(scaled_df,OPTICS,{"min_samples":6, "metric":"euclidean", "algorithm":"auto"},fit=True)
df_cluster = pd.concat([baseline["Subject"].copy(),pd.Series(data_clusters)],axis=1)
df_cluster.columns = ["Subject","clusters"]

#AVERAGE RATING0

name_output = f"baseline_optics_{6}_clusters_avg_rating.docx"
new_data = pd.merge(scales.copy(), df_cluster.copy(), right_on='Subject', left_on='EPRIME_CODE')
new_data.drop("Subject", axis=1, inplace=True)
df = utils.filter_data(new_data)
utils.create_word(df, list_metrics, name_output,df_scales= scales,cluster_order = [1,0,2,5,3,6,4])

#CORRELATION

name_output = f"baseline_optics_{6}_clusters_correlation.docx"
new_data = pd.merge(scales.copy(), df_cluster.copy(), right_on='Subject', left_on='EPRIME_CODE')
new_data.drop("Subject", axis=1, inplace=True)
df = utils.filter_data(new_data)
utils.create_word(df, list_metrics, name_output,df_scales= scales,cluster_order = [5,3,1,2,4,0,6])

#DIFFERENCE MATCH

name_output = f"baseline_optics_{6}_clusters_dif_match.docx"
new_data = pd.merge(scales.copy(), df_cluster.copy(), right_on='Subject', left_on='EPRIME_CODE')
new_data.drop("Subject", axis=1, inplace=True)
df = utils.filter_data(new_data)
utils.create_word(df, list_metrics, name_output,df_scales= scales,cluster_order = [0,5,1,6,3,2,4])

### Hierarchical Counts 6 clusters

In [ ]:
data_clusters = predictive_clustering.clustering(scaled_transformed_df.drop("Subject",axis=1),AgglomerativeClustering,{'n_clusters':6,'linkage':"complete"},fit=True)
df_cluster = pd.concat([scaled_transformed_df["Subject"].copy(),pd.Series(data_clusters)],axis=1)
df_cluster.columns = ["Subject","clusters"]


#AVERAGE HAPPY_0

name_output = f"count_hierarchical_{6}_clusters_happy_0.docx"
new_data = pd.merge(scales.copy(), df_cluster.copy(), right_on='Subject', left_on='EPRIME_CODE')
new_data.drop("Subject", axis=1, inplace=True)
df = utils.filter_data(new_data)
utils.create_word(df, list_metrics, name_output,df_scales= scales, cluster_order=[1, 0, 5, 2, 4, 3])

#AVERAGE HAPPY_1

name_output = f"count_hierarchical_{6}_clusters_happy_1.docx"
new_data = pd.merge(scales.copy(), df_cluster.copy(), right_on='Subject', left_on='EPRIME_CODE')
new_data.drop("Subject", axis=1, inplace=True)
df = utils.filter_data(new_data)
utils.create_word(df, list_metrics, name_output,df_scales= scales, cluster_order=[4, 2, 0, 5, 1, 3])

#AVERAGE SAD_0

name_output = f"count_hierarchical_{6}_clusters_sad_0.docx"
new_data = pd.merge(scales.copy(), df_cluster.copy(), right_on='Subject', left_on='EPRIME_CODE')
new_data.drop("Subject", axis=1, inplace=True)
df = utils.filter_data(new_data)
utils.create_word(df, list_metrics, name_output,df_scales= scales, cluster_order=[1, 0, 2, 4, 5, 3])

#AVERAGE SAD_1

name_output = f"count_hierarchical_{6}_clusters_sad_1.docx"
new_data = pd.merge(scales.copy(), df_cluster.copy(), right_on='Subject', left_on='EPRIME_CODE')
new_data.drop("Subject", axis=1, inplace=True)
df = utils.filter_data(new_data)
utils.create_word(df, list_metrics, name_output,df_scales= scales, cluster_order=[4, 2, 0, 5, 1, 3])

#AVERAGE FEAR_0

name_output = f"count_hierarchical_{6}_clusters_fear_0.docx"
new_data = pd.merge(scales.copy(), df_cluster.copy(), right_on='Subject', left_on='EPRIME_CODE')
new_data.drop("Subject", axis=1, inplace=True)
df = utils.filter_data(new_data)
utils.create_word(df, list_metrics, name_output,df_scales= scales, cluster_order=[1, 0, 2, 5, 4, 3])

#AVERAGE FEAR_1
name_output = f"count_hierarchical_{6}_clusters_fear_1.docx"
new_data = pd.merge(scales.copy(), df_cluster.copy(), right_on='Subject', left_on='EPRIME_CODE')
new_data.drop("Subject", axis=1, inplace=True)
df = utils.filter_data(new_data)
utils.create_word(df, list_metrics, name_output,df_scales= scales, cluster_order=[2, 4, 0, 5, 1, 3])

#CORRELATION

name_output = f"count_hierarchical_{6}_clusters_correlation.docx"
new_data = pd.merge(scales.copy(), df_cluster.copy(), right_on='Subject', left_on='EPRIME_CODE')
new_data.drop("Subject", axis=1, inplace=True)
df = utils.filter_data(new_data)
utils.create_word(df, list_metrics, name_output,df_scales= scales, cluster_order=[5, 2, 1, 4, 0, 3])

#AVERAGE RATING0

name_output = f"count_hierarchical_{6}_clusters_dif_match.docx"
new_data = pd.merge(scales.copy(), df_cluster.copy(), right_on='Subject', left_on='EPRIME_CODE')
new_data.drop("Subject", axis=1, inplace=True)
df = utils.filter_data(new_data)
utils.create_word(df, list_metrics, name_output,df_scales= scales, cluster_order=[4, 2, 5, 0, 1, 3])


#### OPTICS Counts 6 min sample

In [ ]:
data_clusters = predictive_clustering.clustering(scaled_transformed_df.drop("Subject",axis=1),OPTICS,{"min_samples":6, "metric":"euclidean", "algorithm":"auto"},fit=True)
df_cluster = pd.concat([scaled_transformed_df["Subject"].copy(),pd.Series(data_clusters)],axis=1)
df_cluster.columns = ["Subject","clusters"]


#AVERAGE HAPPY_0

name_output = f"count_optics_{6}_clusters_happy_0.docx"
new_data = pd.merge(scales.copy(), df_cluster.copy(), right_on='Subject', left_on='EPRIME_CODE')
new_data.drop("Subject", axis=1, inplace=True)
df = utils.filter_data(new_data)
utils.create_word(df, list_metrics, name_output,df_scales= scales, cluster_order=[2, 1, 0, 3, 4])

#AVERAGE HAPPY_1

name_output = f"count_optics_{6}_clusters_happy_1.docx"
new_data = pd.merge(scales.copy(), df_cluster.copy(), right_on='Subject', left_on='EPRIME_CODE')
new_data.drop("Subject", axis=1, inplace=True)
df = utils.filter_data(new_data)
utils.create_word(df, list_metrics, name_output,df_scales= scales, cluster_order=[0, 4, 3, 1, 2])

#AVERAGE SAD_0

name_output = f"count_optics_{6}_clusters_sad_0.docx"
new_data = pd.merge(scales.copy(), df_cluster.copy(), right_on='Subject', left_on='EPRIME_CODE')
new_data.drop("Subject", axis=1, inplace=True)
df = utils.filter_data(new_data)
utils.create_word(df, list_metrics, name_output,df_scales= scales, cluster_order=[2, 1, 0, 3, 4])

#AVERAGE SAD_1

name_output = f"count_optics_{6}_clusters_sad_1.docx"
new_data = pd.merge(scales.copy(), df_cluster.copy(), right_on='Subject', left_on='EPRIME_CODE')
new_data.drop("Subject", axis=1, inplace=True)
df = utils.filter_data(new_data)
utils.create_word(df, list_metrics, name_output,df_scales= scales, cluster_order=[4, 0, 3, 1, 2])

#AVERAGE FEAR_0

name_output = f"count_optics_{6}_clusters_fear_0.docx"
new_data = pd.merge(scales.copy(), df_cluster.copy(), right_on='Subject', left_on='EPRIME_CODE')
new_data.drop("Subject", axis=1, inplace=True)
df = utils.filter_data(new_data)
utils.create_word(df, list_metrics, name_output,df_scales= scales, cluster_order=[2, 0, 1, 3, 4])

#AVERAGE FEAR_1
name_output = f"count_optics_{6}_clusters_fear_1.docx"
new_data = pd.merge(scales.copy(), df_cluster.copy(), right_on='Subject', left_on='EPRIME_CODE')
new_data.drop("Subject", axis=1, inplace=True)
df = utils.filter_data(new_data)
utils.create_word(df, list_metrics, name_output,df_scales= scales, cluster_order=[0, 4, 3, 1, 2])

#CORRELATION

name_output = f"count_optics_{6}_clusters_correlation.docx"
new_data = pd.merge(scales.copy(), df_cluster.copy(), right_on='Subject', left_on='EPRIME_CODE')
new_data.drop("Subject", axis=1, inplace=True)
df = utils.filter_data(new_data)
utils.create_word(df, list_metrics, name_output,df_scales= scales, cluster_order=[3, 1, 4, 2, 0])

#AVERAGE RATING0

name_output = f"count_optics_{6}_clusters_dif_match.docx"
new_data = pd.merge(scales.copy(), df_cluster.copy(), right_on='Subject', left_on='EPRIME_CODE')
new_data.drop("Subject", axis=1, inplace=True)
df = utils.filter_data(new_data)
utils.create_word(df, list_metrics, name_output,df_scales= scales, cluster_order=[4, 0, 3, 1, 2])

