In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score

# Load the dataset
data = pd.read_csv('customers.csv')

# Define features (X) and target variable (y)
X = data[['Gender', 'Age', 'City_Location']]  # Features: Gender, Age, City_Location
y = data['Frequent_Losers']  # Target variable

# Define preprocessing steps (OneHotEncoder for categorical features, StandardScaler for numerical features)
numeric_features = ['Age']
numeric_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())
])

categorical_features = ['Gender', 'City_Location']
categorical_transformer = Pipeline(steps=[
    ('encoder', OneHotEncoder(drop='first'))
])

In [2]:
# Combine preprocessing steps with column transformation
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Initialize logistic regression classifier within a pipeline
logreg_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', LogisticRegression(max_iter=1000))
])

In [3]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the classifier
logreg_pipeline.fit(X_train, y_train)

# Predict frequent losers for the test set
y_pred = logreg_pipeline.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9998


In [4]:
# Example prediction for a new customer
new_customer_data = pd.DataFrame({
    'Gender': ['Male'],
    'Age': [55],
    'City_Location': ['Denver']
})

# Ensure new customer data matches the same feature set used in training
predicted_frequent_loser = logreg_pipeline.predict(new_customer_data)
print("Predicted frequent loser for the new customer:", predicted_frequent_loser[0])

Predicted frequent loser for the new customer: True
