# Megaline Mobile Legacy to New Plan Conversion

## Stage 1 - Load and Explore Data

### Import the needed libraries

In [None]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

### Load Source Data, Define Features and Target, Verify Target is a Binary Classification

In [None]:
df = pd.read_csv('datasets/users_behavior.csv')

features = df.drop(['is_ultra'])
target = df['is_ultra']

# Inspect Unique Values in Target
print(target.unique())

# Count Class Distribution
print(target.value_counts())

# Data Type Check
print(target.dtype)

## Stage 2 - Split Data
>*  Split Ratio = 3:1:1
>*  Random State = 12345

In [None]:
# split Source Data into Training Set of 60% and a Val/Test set of 40%
features_train, features_val_test, target_train, target_val_test = train_test_split(features, target, test_size=0.40, random_state=12345)

# split Val/Test Set into Validation set of 20% and a Test set of 20%
feature_val, feature_test, target_val, target_test = train_test_split(features_val_test, target_val_test, test_size=0.5, random_state=12345)

## Stage 3 - Train & Validate Baseline Models
>* Decision Tree Classifier
>* Random Forest Classifier
>* Looking for Validation Accuracy >= 0.75

In [None]:
# DecisionTreeClassifier
for depth in range(1, 6):
        # train model on training set
        model = DecisionTreeClassifier(random_state=12345, max_depth=depth)
        model.fit(features_train, target_train)
        # get model predictions on validation set
        predictions_val = model.predict(features_val)
        accuracy = accuracy_score(target_val, predictions_val)
        print("Tree depth:", depth, "Validation Accuracy:", accuracy)

In [None]:
# RandomForestClassifier
for trees in range(10, 51, 10):
    for depth in range(1, 6):
        # train model on training set
        model = RandomForestClassifier(random_state=12345, max_depth=depth, n_estimators=trees)
        model.fit(features_train, target_train)
        # get model predictions on validation set
        predictions_val = model.predict(features_val)
        accuracy = accuracy_score(target_val, predictions_val)
        print("Trees:", trees, "Depth:", depth, "Validation Accuracy:", accuracy)

## Stage 4 - Select Best Model and Final Test
>* Final Test Accuracy >= 0.75

In [None]:
# Final Model Test

final_model = 
final_model.fit(features_test, target_test)
final_predictions = final_model.predict(features_test)
final_accuracy = accuracy_score(target_test, final_predictions)
print("Final Model Test Accuracy:", final_accuracy)