In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

# Load the dataset
file_path = 'CDR-Call-Details.csv'
df = pd.read_csv(file_path)

# Feature Engineering: Create additional features
df['Total Mins'] = df['Day Mins'] + df['Eve Mins'] + df['Night Mins'] + df['Intl Mins']

df['Total Calls'] = df['Day Calls'] + df['Eve Calls'] + df['Night Calls'] + df['Intl Calls']
df['Total Charge'] = df['Day Charge'] + df['Eve Charge'] + df['Night Charge'] + df['Intl Charge']

# Selecting relevant features for model training
feature_columns = ['Account Length', 'VMail Message', 'Total Mins', 'Total Calls', 'Total Charge', 'CustServ Calls']
X = df[feature_columns]
y = df['Churn'].astype(int)  # Using 'Churn' as the target variable

# Normalizing the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Model Building: Train a RandomForestClassifier
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate the model
accuracy = model.score(X_test, y_test)
print(f"Model Accuracy: {accuracy:.2f}")

Model Accuracy: 0.97


In [3]:
import pandas as pd
import numpy as np

# Set the seed for reproducibility
np.random.seed(42)

# Define a set of 25 telecom plans with different features using linspace for consistency
plans = pd.DataFrame({
    'Plan ID': range(1, 26),
    'Monthly Cost': np.linspace(20, 200, 25).astype(int),  # Incremental increase from 20 to 200
    'Included Mins': np.linspace(200, 1500, 25).astype(int),  # Incremental increase from 200 to 1500
    'Included Calls': np.linspace(100, 500, 25, dtype=int),  # Incremental increase from 100 to 500
    'Included Intl Mins': np.linspace(1, 30, 25).astype(int),  # Incremental increase from 1 to 20
})

# Generate meaningful descriptions for each plan
plans['Plan Description'] = plans.apply(
    lambda row: f"Plan {row['Plan ID']} includes {row['Included Mins']} minutes"
                f" and {row['Included Intl Mins']} international minutes.",
    axis=1
)

# Display the first few plans
print(plans.head(25))

    Plan ID  Monthly Cost  Included Mins  Included Calls  Included Intl Mins  \
0         1            20            200             100                   1   
1         2            27            254             116                   2   
2         3            35            308             133                   3   
3         4            42            362             150                   4   
4         5            50            416             166                   5   
5         6            57            470             183                   7   
6         7            65            525             200                   8   
7         8            72            579             216                   9   
8         9            80            633             233                  10   
9        10            87            687             250                  11   
10       11            95            741             266                  13   
11       12           102            795

In [5]:
import pandas as pd
import numpy as np

def check_for_outliers(user_data):
    # Define thresholds for outliers (these can be adjusted)
    mins_threshold = 2000  # Example threshold for minutes
    calls_threshold = 1000  # Example threshold for calls
    charge_threshold = 200  # Example threshold for total charge

    user_mins = user_data['Total Mins'].values[0]
    user_calls = user_data['Total Calls'].values[0]
    user_charge = user_data['Total Charge'].values[0]

    # Using OR operator for outlier detection
    return (user_mins > mins_threshold) or (user_calls > calls_threshold) or (user_charge > charge_threshold)

def recommend_plans(user_data, plans, top_n=3):
    user_mins = user_data['Total Mins'].values[0]
    user_calls = user_data['Total Calls'].values[0]
    user_charge = user_data['Total Charge'].values[0]

    if check_for_outliers(user_data):
        recommendations = [{
            'Plan ID': 'UNLIMITED',
            'Plan Description': 'Unlimited Postpaid Plan',
            'Monthly Cost': 'As Per Usage',
        }]
        
        # Include other plans as available options
        other_plans = []
        for _, plan in plans.iterrows():
            other_plans.append({
                'Plan ID': plan['Plan ID'],
                'Plan Description': plan['Plan Description'],
                'Monthly Cost': plan['Monthly Cost'],
            })
        
        return user_mins, user_calls, user_charge, recommendations, other_plans, True

    # Calculate expected cost for non-outlier recommendations
    plans['Expected Cost'] = plans.apply(
        lambda row: abs(row['Included Mins'] - user_mins) +
                    max(0, user_calls - row['Included Calls']) +
                    max(0, user_charge - row['Monthly Cost']),
        axis=1
    )

    best_plans = plans.sort_values('Expected Cost').head(top_n)
    other_plans = plans.sort_values('Expected Cost').iloc[top_n:]

    recommendations = []
    for _, plan in best_plans.iterrows():
        recommendations.append({
            'Plan ID': plan['Plan ID'],
            'Plan Description': plan['Plan Description'],
            'Monthly Cost': plan['Monthly Cost'],
        })

    other_plan_details = []
    for _, plan in other_plans.iterrows():
        other_plan_details.append({
            'Plan ID': plan['Plan ID'],
            'Plan Description': plan['Plan Description'],
            'Monthly Cost': plan['Monthly Cost'],
        })

    return user_mins, user_calls, user_charge, recommendations, other_plan_details, False

def user_interface(df, model, scaler, plans):
    phone_number = input("Enter your phone number: ")
    
    if phone_number:
        user_data = df[df['Phone Number'] == phone_number]

        if user_data.empty:
            print("Phone number doesn't exist in the data. Please provide your usage details.")
            
            user_mins = float(input("Enter the number of minutes: "))
            user_intl_mins = float(input("Enter the number of international minutes: "))

            if user_mins >= 0 and user_intl_mins >= 0:
                user_data = pd.DataFrame({
                    'Total Mins': [user_mins],
                    'Total Calls': [user_intl_mins],
                    'Total Charge': [0],  # Dummy value as charge is not provided
                })

                user_mins, user_calls, user_charge, recommended_plans, other_plans, is_outlier = recommend_plans(user_data, plans)

                print("## Recommended Plans")
                for plan in recommended_plans:
                    print(f"\nPlan ID: {plan['Plan ID']}")
                    print(f"Description: {plan['Plan Description']}")
                    if isinstance(plan['Monthly Cost'], (int, float)):
                        print(f"Monthly Cost: ${int(plan['Monthly Cost'])}")  # Convert to integer
                    else:
                        print(f"Monthly Cost: {plan['Monthly Cost']}")
                    print("---")

                print("## Other Available Plans")
                for plan in other_plans:
                    print(f"\nPlan ID: {plan['Plan ID']}")
                    print(f"Description: {plan['Plan Description']}")
                    if isinstance(plan['Monthly Cost'], (int, float)):
                        print(f"Monthly Cost: ${int(plan['Monthly Cost'])}")  # Convert to integer
                    else:
                        print(f"Monthly Cost: {plan['Monthly Cost']}")
                    print("---")
        else:
            user_mins, user_calls, user_charge, recommended_plans, other_plans, is_outlier = recommend_plans(user_data, plans)

            print(f"User History for {phone_number}:")
            print(f"Total Mins: {user_mins:.2f}")

            if is_outlier:
                print(f"Total Calls: {user_calls:.2f}")
                
            print(f"Total Charge: {user_charge:.2f}")

            print("## Recommended Plans")
            for plan in recommended_plans:
                print(f"\nPlan ID: {plan['Plan ID']}")
                print(f"Description: {plan['Plan Description']}")
                if isinstance(plan['Monthly Cost'], (int, float)):
                    print(f"Monthly Cost: ${int(plan['Monthly Cost'])}")  # Convert to integer
                else:
                    print(f"Monthly Cost: {plan['Monthly Cost']}")

            print("\n## Other Available Plans")
            for plan in other_plans:
                print(f"\nPlan ID: {plan['Plan ID']}")
                print(f"Description: {plan['Plan Description']}")
                if isinstance(plan['Monthly Cost'], (int, float)):
                    print(f"Monthly Cost: ${int(plan['Monthly Cost'])}")  # Convert to integer
                else:
                    print(f"Monthly Cost: {plan['Monthly Cost']}")

# Run the user interface
user_interface(df, model, scaler, plans)

Phone number doesn't exist in the data. Please provide your usage details.
## Recommended Plans

Plan ID: 2
Description: Plan 2 includes 254 minutes and 2 international minutes.
Monthly Cost: $27
---

Plan ID: 1
Description: Plan 1 includes 200 minutes and 1 international minutes.
Monthly Cost: $20
---

Plan ID: 3
Description: Plan 3 includes 308 minutes and 3 international minutes.
Monthly Cost: $35
---
## Other Available Plans

Plan ID: 4
Description: Plan 4 includes 362 minutes and 4 international minutes.
Monthly Cost: $42
---

Plan ID: 5
Description: Plan 5 includes 416 minutes and 5 international minutes.
Monthly Cost: $50
---

Plan ID: 6
Description: Plan 6 includes 470 minutes and 7 international minutes.
Monthly Cost: $57
---

Plan ID: 7
Description: Plan 7 includes 525 minutes and 8 international minutes.
Monthly Cost: $65
---

Plan ID: 8
Description: Plan 8 includes 579 minutes and 9 international minutes.
Monthly Cost: $72
---

Plan ID: 9
Description: Plan 9 includes 633 mi