In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [5]:
# Load your dataset
file_path = 'combined.csv'  # Adjust this if using another location
combined = pd.read_csv(file_path)

In [None]:
# Select features and target
# Remove country because there is only USA
features = ['lat', 'long', 'source', 'device', 'operative_system', 'price', 'test']
target = 'converted'

# Split the data
X = combined[features]
y = combined[target]

In [7]:
# Define preprocessing for categorical features
categorical_features = ['country', 'source', 'device', 'operative_system']
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(drop='first'), categorical_features)
    ],
    remainder='passthrough'  # Include other features as-is
)

In [8]:
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

In [9]:
# Create the pipeline with Logistic Regression
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', LogisticRegression(max_iter=1000, random_state=50))
])

# Train the model
model.fit(X_train, y_train)

# Evaluate the model
accuracy = model.score(X_test, y_test)
print(f"Model accuracy: {accuracy:.2f}")

Model accuracy: 0.98


In [23]:
# Function to predict optimal price
def predict_optimal_price(model, input_data, preprocessor):
    """
    Predicts the price (39 or 59) that maximizes the probability of 'converted = 1' for a given input.
    
    Parameters:
    - model: Trained model
    - input_data: Single-row DataFrame containing features for prediction
    - preprocessor: Preprocessing pipeline
    
    Returns:
    - Optimal price (39 or 59)
    """
    # Copy the input data for modification
    input_test_0 = input_data.copy()
    input_test_0['test'] = 0  # Price 39
    input_test_0['price'] = 39

    input_test_1 = input_data.copy()
    input_test_1['test'] = 1  # Price 59
    input_test_1['price'] = 59

    # Apply preprocessing
    processed_test_0 = preprocessor.transform(input_test_0)
    processed_test_1 = preprocessor.transform(input_test_1)

    # Predict probabilities for 'converted = 1'
    prob_test_0 = model.predict_proba(processed_test_0)[:, 1]
    prob_test_1 = model.predict_proba(processed_test_1)[:, 1]

    # Choose the price with the higher probability
    return 39 if prob_test_0 > prob_test_1 else 59

# Example usage: Predict optimal price for a sample input
sample_input = X_test.iloc[[504]].copy()  # Taking the first sample from the test set
optimal_price = predict_optimal_price(
    model.named_steps['classifier'], 
    sample_input, 
    model.named_steps['preprocessor']
)

print(f"Optimal price: {optimal_price}")


Optimal price: 39
