In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.dummy import DummyClassifier

In [2]:
df = pd.read_csv('/datasets/users_behavior.csv')

In [3]:
x = df.drop('is_ultra', axis=1)
y = df['is_ultra']

In [4]:
x_train, x_temp, y_train, y_temp = train_test_split(x,y, test_size=0.4, random_state=12345)
x_valid, x_test, y_valid, y_test = train_test_split(x_temp, y_temp, test_size=0.5, random_state=12345)

In [5]:

best_model = None
best_accuracy = 0
results = {}


In [6]:
# Arbol de desicion
for depth in range (1,11):
    model = DecisionTreeClassifier(max_depth=depth, random_state=12345)
    model.fit(x_train, y_train)
    predictions = model.predict(x_valid)
    acc = accuracy_score(y_valid, predictions)
    results[f'DesicionTree_depth_{depth}'] = acc
    if acc > best_accuracy:
        best_accuracy = acc
        best_model = model

In [7]:
# Bosque aleatorio
for est in [10, 50, 100, 200]:
    model = RandomForestClassifier(n_estimators=est, random_state=12345)
    model.fit(x_train, y_train)
    predictions = model.predict(x_valid)
    acc = accuracy_score(y_valid, predictions)
    results[f'RandomForest_{est}'] = acc
    if acc > best_accuracy:
        best_accuracy = acc
        best_model = model

In [8]:
# Regresion logistica
model = LogisticRegression(solver='lbfgs', max_iter=1000, random_state = 12345)
model.fit(x_train, y_train)
predictions = model.predict(x_valid)
acc = accuracy_score(y_valid, predictions)
results['LogisticRegression'] = acc
if acc > best_accuracy:
    best_accuracy = acc
    best_model = model

In [9]:
print("Resultados en validacion:")
for name, acc in results.items():
    print(f'{name}: {acc:.4f}')

Resultados en validacion:
DesicionTree_depth_1: 0.7543
DesicionTree_depth_2: 0.7823
DesicionTree_depth_3: 0.7854
DesicionTree_depth_4: 0.7792
DesicionTree_depth_5: 0.7792
DesicionTree_depth_6: 0.7838
DesicionTree_depth_7: 0.7823
DesicionTree_depth_8: 0.7792
DesicionTree_depth_9: 0.7823
DesicionTree_depth_10: 0.7745
RandomForest_10: 0.7854
RandomForest_50: 0.7916
RandomForest_100: 0.7854
RandomForest_200: 0.7869
LogisticRegression: 0.7107


In [10]:
test_predictions = best_model.predict(x_test)
test_accuracy = accuracy_score(y_test, test_predictions)

print("\nMejor modelo:", type(best_model).__name__)
print("Accuracy en validación:", best_accuracy)
print("Accuracy en prueba:", test_accuracy)


Mejor modelo: RandomForestClassifier
Accuracy en validación: 0.7916018662519441
Accuracy en prueba: 0.7931570762052877


In [14]:
dummy = DummyClassifier(strategy="most_frequent")
dummy.fit(x_train, y_train)
dummy_predictions = dummy.predict(x_test)
dummy_accuracy = accuracy_score(y_test, dummy_predictions)

print("Accuracy del modelo dummy:", dummy_accuracy)
print("Accuracy del mejor modelo:", test_accuracy)

Accuracy del modelo dummy: 0.6842923794712286
Accuracy del mejor modelo: 0.7931570762052877
