# ML

### Librerías

In [2]:
import pandas as pd
import numpy as np
from sklearn import linear_model
from sklearn import model_selection
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns
import re

In [3]:
df = pd.read_csv(r'C:\Users\nuria\OneDrive\Escritorio\ML_student_depression\data\processed\dataset_limpio_2.csv')
df.head()

Unnamed: 0,Gender,Age,Academic Pressure,CGPA,Study Satisfaction,Sleep Duration,Dietary Habits,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Family History of Mental Illness,Depression
0,1,33.0,5.0,8.97,2.0,0,0,1,3.0,1.0,0,1
1,0,24.0,2.0,5.9,5.0,0,1,0,3.0,2.0,1,0
2,1,31.0,3.0,7.03,5.0,2,0,0,9.0,1.0,1,0
3,0,28.0,3.0,5.59,2.0,1,1,1,4.0,5.0,1,1
4,0,25.0,4.0,8.13,3.0,0,1,1,1.0,1.0,0,0


In [4]:
from sklearn.model_selection import train_test_split

x = df.drop(columns=["Depression"])
y =df["Depression"]

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,random_state=42)

In [5]:
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from lightgbm import LGBMClassifier

In [6]:
model_list = [
    LogisticRegression(),
    DecisionTreeClassifier(ccp_alpha=0.001),
    RandomForestClassifier(),
    GradientBoostingClassifier(),
    AdaBoostClassifier(),
    SVC(),
    GaussianNB(),
    KNeighborsClassifier(),
    LinearDiscriminantAnalysis(),
    QuadraticDiscriminantAnalysis(),
    LGBMClassifier(verbose=-1)
]

from sklearn.metrics import accuracy_score
results={}
for model in model_list:
    model.fit(x_train, y_train)
    
    y_pred = model.predict(x_test)
    
    accuracy = accuracy_score(y_test, y_pred)

    results[type(model).__name__] = accuracy

for model_name, accuracy in results.items():
    print(model_name , (accuracy*100))

LogisticRegression 84.24322064269502
DecisionTreeClassifier 82.06904790347629
RandomForestClassifier 83.07251224465416
GradientBoostingClassifier 84.19543662644845
AdaBoostClassifier 84.11181459801696
SVC 84.17154461832517
GaussianNB 83.55035240711982
KNeighborsClassifier 79.40508899773026
LinearDiscriminantAnalysis 84.0640305817704
QuadraticDiscriminantAnalysis 83.59813642336638
LGBMClassifier 84.11181459801696


In [7]:
max_classifier = max(results, key=results.get)
max_accuracy = results[max_classifier]

print("El clasificador con mayor accuracy es:", max_classifier ,"con un accuracy de:" ,max_accuracy)

El clasificador con mayor accuracy es: LogisticRegression con un accuracy de: 0.8424322064269502


In [8]:
model=VotingClassifier(estimators=[
   ("logistic", LogisticRegression()),
    ("decision_tree", DecisionTreeClassifier(ccp_alpha=0.001)),
    ("random_forest", RandomForestClassifier()),
    ("gradient_boosting", GradientBoostingClassifier()),
    ("ada_boost", AdaBoostClassifier()),
    ("svc", SVC()),
    ("gaussian_nb", GaussianNB()),
    ("k_neighbors", KNeighborsClassifier()),
    ("lda", LinearDiscriminantAnalysis()),
    ("qda", QuadraticDiscriminantAnalysis()),
    ("lgbm", LGBMClassifier(verbose=-1))
])
model.fit(x_train,y_train)

In [9]:
y_pred = model.predict(x_test)
    
accuracy = accuracy_score(y_test, y_pred)
accuracy

0.8425516664675666

In [1]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression


In [10]:
model = LogisticRegression(solver='sag') 

In [None]:
param_grid = {
    'C': [ 1], 
    'penalty': ['l2'], 
    'max_iter': [100]  
}

In [14]:
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(x_train, y_train)
print("Mejores parámetros:", grid_search.best_params_)
print("Mejor puntuación:", grid_search.best_score_)



Mejores parámetros: {'C': 1, 'max_iter': 100, 'penalty': 'l2'}
Mejor puntuación: 0.8480286738351255




In [15]:
best_model = grid_search.best_estimator_
test_score = best_model.score(x_test, y_test)
print("Puntuación en el conjunto de prueba:", test_score)

Puntuación en el conjunto de prueba: 0.8423127463863338
