In [None]:
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

In [None]:
try:
    data = pd.read_csv("users_behavior.csv")
except:
    data = pd.read_csv("/datasets/users_behavior.csv")

In [None]:
print(data.head(10))

In [None]:
data.info()

In [None]:
print(data["is_ultra"].loc[data["is_ultra"] == 0].count())

In [None]:
data.corr()

In [None]:
features = data.drop(['is_ultra'], axis=1)
target = data['is_ultra']

In [None]:
features_train, features_valid, target_train, target_valid = train_test_split(
    features, target, test_size=0.30, random_state=12345, stratify=target)
features_valid, features_test, target_valid, test_target = train_test_split(
    features_valid, target_valid, test_size=0.3, random_state=12345, stratify=target_valid)

In [None]:
print(features_train.shape)
print(features_valid.shape)
print(features_test.shape)

In [None]:
print(target_train[target_train == 0].count())
print(target_valid[target_valid == 0].count())
print(test_target[test_target == 0].count())

In [None]:
best_model_dtc = None
best_acc_dtc = 0

In [None]:
for i in range(1, 6):
    model = DecisionTreeClassifier(random_state=12345, max_depth=i)
    model.fit(features_train, target_train)
    predictions_valid = model.predict(features_valid)
    acc = accuracy_score(target_valid, predictions_valid)

    if best_acc_dtc < acc:
        best_model_dtc = model
        best_acc_dtc = acc

In [None]:
print(best_acc_dtc)

In [None]:
plot_tree(best_model_dtc)

In [None]:
best_model_rfc = None
best_acc_rfc = 0

In [None]:
%%time
for i in range(1, 50):
    for g in range(1, 5):
        model = RandomForestClassifier(random_state=12345, n_estimators=i, max_depth=g)
        model.fit(features, target)
        predictions_valid = model.predict(features_valid)
        acc = accuracy_score(target_valid, predictions_valid)

        if best_acc_rfc < acc:
            best_model_rfc = model
            best_acc_rfc = acc

In [None]:
prediction = best_model_rfc.predict(features_test)
accuracy = accuracy_score(test_target, prediction)
print(accuracy)

In [None]:
best_model_lr = None
best_acc_lr = 0

In [None]:
for i in range(10000, 100000, 10000):
    model = LogisticRegression(solver="lbfgs", random_state=12345, max_iter=i)
    model.fit(features_train, target_train)
    predictions_valid = model.predict(features_valid)
    acc = accuracy_score(target_valid, predictions_valid)

    if best_acc_lr < acc:
        best_model_lr = model
        best_acc_lr = acc

In [None]:
print(best_acc_lr)