In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

In [13]:
df = pd.read_csv('/datasets/users_behavior.csv')         
print(df.info())         
print(df.describe())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3214 entries, 0 to 3213
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   calls     3214 non-null   float64
 1   minutes   3214 non-null   float64
 2   messages  3214 non-null   float64
 3   mb_used   3214 non-null   float64
 4   is_ultra  3214 non-null   int64  
dtypes: float64(4), int64(1)
memory usage: 125.7 KB
None
             calls      minutes     messages       mb_used     is_ultra
count  3214.000000  3214.000000  3214.000000   3214.000000  3214.000000
mean     63.038892   438.208787    38.281269  17207.673836     0.306472
std      33.236368   234.569872    36.148326   7570.968246     0.461100
min       0.000000     0.000000     0.000000      0.000000     0.000000
25%      40.000000   274.575000     9.000000  12491.902500     0.000000
50%      62.000000   430.600000    30.000000  16943.235000     0.000000
75%      82.000000   571.927500    57.000000  21424.700000  

In [6]:
features = df.drop('is_ultra', axis=1)  
target = df['is_ultra']                 

features_train, features_temp, target_train, target_temp = train_test_split(features, target, test_size=0.4, random_state=42)
features_valid, features_test, target_valid, target_test = train_test_split(features_temp, target_temp, test_size=0.5, random_state=42)


In [7]:
melhor_score = 0
melhor_profundidade = 0

for depth in range(1, 21):
    model = DecisionTreeClassifier(max_depth=depth, random_state=42)
    model.fit(features_train, target_train)
    predicoes = model.predict(features_valid)
    score = accuracy_score(target_valid, predicoes)
    if score > melhor_score:
        melhor_score = score
        melhor_profundidade = depth

print(f"Melhor profundidade: {melhor_profundidade}, Acurácia: {melhor_score:.4f}")

Melhor profundidade: 8, Acurácia: 0.7963


In [9]:
melhor_score = 0
melhor_estimador = 0

for est in range(10, 101, 10):
    model = RandomForestClassifier(n_estimators=est, random_state=42)
    model.fit(features_train, target_train)
    predicoes = model.predict(features_valid)
    score = accuracy_score(target_valid, predicoes)
    if score > melhor_score:
        melhor_score = score
        melhor_estimador = est

print(f"Melhor n_estimators: {melhor_estimador}, Acurácia: {melhor_score:.4f}")

Melhor n_estimators: 90, Acurácia: 0.8025


In [10]:
model = LogisticRegression(solver='liblinear', random_state=42)
model.fit(features_train, target_train)
predicoes = model.predict(features_valid)
score = accuracy_score(target_valid, predicoes)

print(f"Acurácia da Regressão Logística: {score:.4f}")

Acurácia da Regressão Logística: 0.7076


In [11]:
modelo_final = RandomForestClassifier(n_estimators=melhor_estimador, random_state=42)
modelo_final.fit(features_train, target_train)

predicoes_teste = modelo_final.predict(features_test)
score_final = accuracy_score(target_test, predicoes_teste)

print(f"Acurácia no conjunto de teste: {score_final:.4f}")

Acurácia no conjunto de teste: 0.8134
