In [25]:
import feast
import dagshub
import mlflow
import pandas as pd
from feast import (
    FeatureStore,
    Entity,
    FeatureService,
    FeatureView,
    Field,
    FileSource
)
from feast.types import Float32, Float64, Int64, String
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.metrics import (
                            silhouette_score, 
                            davies_bouldin_score, 
                            calinski_harabasz_score, 
                            adjusted_rand_score, 
                            normalized_mutual_info_score, 
                            accuracy_score, 
                            classification_report, 
                            confusion_matrix
                            )



In [26]:
Stresslevel =  pd.read_csv("../data/Stress_Dataset.csv")
stress = pd.read_csv('../data/StressLevelDataset.csv')

In [27]:
Stresslevel['Which type of stress do you primarily experience?'].unique()
Stresslevel.insert(0, 'Id', range(1, len(Stresslevel) + 1))

In [28]:
Stresslevel['Which type of stress do you primarily experience?'] = Stresslevel['Which type of stress do you primarily experience?'].replace({
    'Eustress (Positive Stress) - Stress that motivates and enhances performance.': 'Eustress',
    'No Stress - Currently experiencing minimal to no stress.': 'No stress',
    'Distress (Negative Stress) - Stress that causes anxiety and impairs well-being.': 'Distress'
})

In [29]:
Stresslevel

Unnamed: 0,Id,Gender,Age,Have you recently experienced stress in your life?,Have you noticed a rapid heartbeat or palpitations?,Have you been dealing with anxiety or tension recently?,Do you face any sleep problems or difficulties falling asleep?,Have you been dealing with anxiety or tension recently?.1,Have you been getting headaches more often than usual?,Do you get irritated easily?,...,Are you facing any difficulties with your professors or instructors?,Is your working environment unpleasant or stressful?,Do you struggle to find time for relaxation and leisure activities?,Is your hostel or home environment causing you difficulties?,Do you lack confidence in your academic performance?,Do you lack confidence in your choice of academic subjects?,Academic and extracurricular activities conflicting for you?,Do you attend classes regularly?,Have you gained/lost weight?,Which type of stress do you primarily experience?
0,1,0,20,3,4,2,5,1,2,1,...,3,1,4,1,2,1,3,1,2,Eustress
1,2,0,20,2,3,2,1,1,1,1,...,3,2,1,1,3,2,1,4,2,Eustress
2,3,0,20,5,4,2,2,1,3,4,...,2,2,2,1,4,1,1,2,1,Eustress
3,4,1,20,3,4,3,2,2,3,4,...,1,1,2,1,2,1,1,5,3,Eustress
4,5,0,20,3,3,3,2,2,4,4,...,2,3,1,2,2,4,2,2,2,Eustress
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
838,839,0,21,3,4,2,3,5,1,5,...,2,3,3,3,4,1,2,2,2,Eustress
839,840,1,19,3,2,1,2,2,1,2,...,1,1,1,3,2,1,2,3,1,No stress
840,841,1,19,4,4,3,4,3,2,2,...,2,2,2,2,3,1,4,5,3,Eustress
841,842,0,20,5,4,3,4,3,4,4,...,2,2,1,4,3,5,4,5,1,Eustress


In [34]:
df = Stresslevel
y = df["Which type of stress do you primarily experience?"]
X = df.drop(columns=["Which type of stress do you primarily experience?"])

# Crear promedio de síntomas (todas las preguntas, menos Gender y Age)
symptom_cols = [column for column in X.columns if column not in ["Gender", "Age"]]

#Posible factor psiquis
psiquis_factor = [column for column in X.columns if column in ["Have you recently experienced stress in your life?",
                                                                "Have you been dealing with anxiety or tension recently?",
                                                                "Do you get irritated easily?",
                                                                "Do you have trouble concentrating on your academic tasks?",
                                                               "Do you lack confidence in your academic performance?",
                                                              ]]

#Posible factor biológico
biological_factor = [column for column in X.columns if column in ["Have you noticed a rapid heartbeat or palpitations?",
                                                                  "Do you face any sleep problems or difficulties falling asleep?",
                                                                  "Have you been getting headaches more often than usual?",
                                                                  "Have you gained/lost weight?"                                                                 
                                                                 ]]

#posible factor personal
persona_factor = [column for column in X.columns if column in ["Are you facing any difficulties with your professors or instructors?",
                                                               "Is your working environment unpleasant or stressful?",
                                                               "Do you struggle to find time for relaxation and leisure activities?",
                                                               "Is your hostel or home environment causing you difficulties?",
                                                               "Academic and extracurricular activities conflicting for you?",
                                                               "Do you attend classes regularly?"
                                                              ]]



X["symptom_mean"] = X[symptom_cols].mean(axis = 1)
no_gender_data = X.drop(["Gender", "Id"], axis= 'columns')

cols_copy = [column for column in X.columns if column not in ["Id"]]


reducing_data = X.drop(cols_copy, axis = 'columns')

reducing_data["psicological_feature"] = X[psiquis_factor].mean(axis = 1)
reducing_data["biological_feature"] = X[biological_factor].mean(axis = 1)
reducing_data["personal_feature"] = X[persona_factor].mean(axis = 1)



In [46]:
### Aca se pasará el dataset X_reducing
class Scaler_process(Feast_use):
    def scaler(self, data: pd.DataFrame, scale_method: "method", pipeline_name: str):
        self.scale_method = scale_method
        self.data = data
        self.pca = PCA(n_components=2)
        self.pipeline_name = pipeline_name
        self.explained_variance = self.pca.explained_variance_ratio_

            
        scaler_pipeline = Pipeline(
            steps = [
                ("scaler", self.scale_method),
                ("PCA", self.pca),
                    ]
            )
            
        return pd.DataFrame(
            scaler_pipeline.fit_transform(self.data),
            columns =  [f"{self.pipeline_name}_feat_{i+1}" for i in range(self.n_components)]
        )

#### sacler_table = scaler(data = no_gender_data, scale_method = StandarScaler(); MacxMinScaler(), RobustScaler(), pipeli)

    def plot_variance(self):
            plt.plot(range(1, len(self.explained_variance)+1), self.explained_variance, marker="o")
            plt.title("Explained Variance by Components")
            plt.xlabel("Component")
            plt.ylabel("Explained Variance Ratio")
            plt.show()
### scaler_table.plot_variance()

In [None]:
class Processing:
    def pipeline_process(self, ):

### FEATURE STORE

In [1]:
class Feast_use:
    def __init__(self, filepath = "../feast_service/fs_dsrp_mle2_jul9/feature_repo/data", addons):
        self.filepath = filepath
        self.addons = addons

        
    def table_creator():




        
        pass
        




        

In [21]:
X

Unnamed: 0,Gender,Age,Have you recently experienced stress in your life?,Have you noticed a rapid heartbeat or palpitations?,Have you been dealing with anxiety or tension recently?,Do you face any sleep problems or difficulties falling asleep?,Have you been dealing with anxiety or tension recently?.1,Have you been getting headaches more often than usual?,Do you get irritated easily?,Do you have trouble concentrating on your academic tasks?,...,Are you facing any difficulties with your professors or instructors?,Is your working environment unpleasant or stressful?,Do you struggle to find time for relaxation and leisure activities?,Is your hostel or home environment causing you difficulties?,Do you lack confidence in your academic performance?,Do you lack confidence in your choice of academic subjects?,Academic and extracurricular activities conflicting for you?,Do you attend classes regularly?,Have you gained/lost weight?,symptom_mean
0,0,20,3,4,2,5,1,2,1,2,...,3,1,4,1,2,1,3,1,2,2.260870
1,0,20,2,3,2,1,1,1,1,4,...,3,2,1,1,3,2,1,4,2,2.000000
2,0,20,5,4,2,2,1,3,4,2,...,2,2,2,1,4,1,1,2,1,2.521739
3,1,20,3,4,3,2,2,3,4,3,...,1,1,2,1,2,1,1,5,3,2.521739
4,0,20,3,3,3,2,2,4,4,4,...,2,3,1,2,2,4,2,2,2,2.391304
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
838,0,21,3,4,2,3,5,1,5,4,...,2,3,3,3,4,1,2,2,2,2.826087
839,1,19,3,2,1,2,2,1,2,3,...,1,1,1,3,2,1,2,3,1,1.826087
840,1,19,4,4,3,4,3,2,2,3,...,2,2,2,2,3,1,4,5,3,2.652174
841,0,20,5,4,3,4,3,4,4,4,...,2,2,1,4,3,5,4,5,1,3.173913


In [None]:

with mlflow.start_run(run_name="Random Forest") as run:
    algorithm = RandomForestClassifier()
    pipeline = Pipeline(
            steps=[
                ("imputer", SimpleImputer(strategy="mean")),
                ("rf", algorithm)
            ]
        )
experimento = experiment_definition() <-> ensamble_    
#hay un problema con el kernel


In [None]:


#quiero aplicar el clustering para ver como se comportan los datos y la clasificación
silhouette = silhouette_score(X_scaled, labels)
dbi = davies_bouldin_score(X_scaled, labels)
chi = calinski_harabasz_score(X_scaled, labels)
ari = adjusted_rand_score(y, labels)   # compara clusters con etiquetas reales
nmi = normalized_mutual_info_score(y, labels)


print(f"Silhouette Score: {silhouette:.3f}")
print(f"Davies-Bouldin Index: {dbi:.3f}")
print(f"Calinski-Harabasz Index: {chi:.3f}")
print(f"Adjusted Rand Index {ari:.3f}")
print(f"Normalized Mutual Info: {nmi:.3f}")

#umap
embedding = UMAP(n_neighbors= , min_dist=0.1, n_components= , random_state=42).fit_transform(X_scaled)

fig, ax = plt.subplots(1, 2, figsize=(12, 5))

'''
sns.scatterplot(x=embedding[:,0], y=embedding[:,1], hue=y, ax=ax[0], palette="Set1")
ax[0].set_title("UMAP - Etiquetas reales")

sns.scatterplot(x=embedding[:,0], y=embedding[:,1], hue=labels, ax=ax[1], palette="Set2")
ax[1].set_title("UMAP - Clusters KMeans")

plt.show()

'''

In [17]:
Stresslevel.info()
'Have you been dealing with anxiety or tension recently? ',
'Have you been dealing with anxiety or tension recently?.1',
'Have you noticed a rapid heartbeat or palpitations? ',
'Do you face any sleep problems or difficulties falling asleep? ',
'Do you feel overwhelmed with your academic workload? ',
'Do you have trouble concentrating on your academic tasks? ',
'Do you get irritated easily?',
'',
'',
"exogena" -> 'Have you gained/lost weight?'

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 843 entries, 0 to 842
Data columns (total 26 columns):
 #   Column                                                                Non-Null Count  Dtype 
---  ------                                                                --------------  ----- 
 0   Gender                                                                843 non-null    int64 
 1   Age                                                                   843 non-null    int64 
 2   Have you recently experienced stress in your life?                    843 non-null    int64 
 3   Have you noticed a rapid heartbeat or palpitations?                   843 non-null    int64 
 4   Have you been dealing with anxiety or tension recently?               843 non-null    int64 
 5   Do you face any sleep problems or difficulties falling asleep?        843 non-null    int64 
 6   Have you been dealing with anxiety or tension recently?.1             843 non-null    int64 
 7   Have you

In [15]:
stress

Unnamed: 0,anxiety_level,self_esteem,mental_health_history,depression,headache,blood_pressure,sleep_quality,breathing_problem,noise_level,living_conditions,...,basic_needs,academic_performance,study_load,teacher_student_relationship,future_career_concerns,social_support,peer_pressure,extracurricular_activities,bullying,stress_level
0,14,20,0,11,2,1,2,4,2,3,...,2,3,2,3,3,2,3,3,2,1
1,15,8,1,15,5,3,1,4,3,1,...,2,1,4,1,5,1,4,5,5,2
2,12,18,1,14,2,1,2,2,2,2,...,2,2,3,3,2,2,3,2,2,1
3,16,12,1,15,4,3,1,3,4,2,...,2,2,4,1,4,1,4,4,5,2
4,16,28,0,7,2,3,5,1,3,2,...,3,4,3,1,2,1,5,0,5,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1095,11,17,0,14,3,1,3,2,2,2,...,3,2,2,2,3,3,2,3,3,1
1096,9,12,0,8,0,3,0,0,0,1,...,4,0,1,1,1,1,3,4,3,2
1097,4,26,0,3,1,2,5,2,2,3,...,4,5,1,4,1,3,1,2,1,0
1098,21,0,1,19,5,3,1,4,3,1,...,1,2,5,1,4,1,4,4,4,2


In [35]:
stress['stress_level'] = stress['stress_level'].replace({
    0: 'Low',
    1: 'Medium',
    2: 'High'
})


In [36]:
stress
'anxiety_level',
'self_esteem',
'sleep_quality',
'social_support',
'depression',
'bullying',
'mental_health_history',
"exogena1" -> 'safety'
"exogena2" -> 'living_conditions'

Unnamed: 0,anxiety_level,self_esteem,mental_health_history,depression,headache,blood_pressure,sleep_quality,breathing_problem,noise_level,living_conditions,...,basic_needs,academic_performance,study_load,teacher_student_relationship,future_career_concerns,social_support,peer_pressure,extracurricular_activities,bullying,stress_level
0,14,20,0,11,2,1,2,4,2,3,...,2,3,2,3,3,2,3,3,2,Medium
1,15,8,1,15,5,3,1,4,3,1,...,2,1,4,1,5,1,4,5,5,High
2,12,18,1,14,2,1,2,2,2,2,...,2,2,3,3,2,2,3,2,2,Medium
3,16,12,1,15,4,3,1,3,4,2,...,2,2,4,1,4,1,4,4,5,High
4,16,28,0,7,2,3,5,1,3,2,...,3,4,3,1,2,1,5,0,5,Medium
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1095,11,17,0,14,3,1,3,2,2,2,...,3,2,2,2,3,3,2,3,3,Medium
1096,9,12,0,8,0,3,0,0,0,1,...,4,0,1,1,1,1,3,4,3,High
1097,4,26,0,3,1,2,5,2,2,3,...,4,5,1,4,1,3,1,2,1,Low
1098,21,0,1,19,5,3,1,4,3,1,...,1,2,5,1,4,1,4,4,4,High


In [20]:
stress.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1100 entries, 0 to 1099
Data columns (total 21 columns):
 #   Column                        Non-Null Count  Dtype
---  ------                        --------------  -----
 0   anxiety_level                 1100 non-null   int64
 1   self_esteem                   1100 non-null   int64
 2   mental_health_history         1100 non-null   int64
 3   depression                    1100 non-null   int64
 4   headache                      1100 non-null   int64
 5   blood_pressure                1100 non-null   int64
 6   sleep_quality                 1100 non-null   int64
 7   breathing_problem             1100 non-null   int64
 8   noise_level                   1100 non-null   int64
 9   living_conditions             1100 non-null   int64
 10  safety                        1100 non-null   int64
 11  basic_needs                   1100 non-null   int64
 12  academic_performance          1100 non-null   int64
 13  study_load                    110