In [4]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.cluster import MiniBatchKMeans
import json
import time
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

class EnhancedPatternBasedRecommender:
    def __init__(self, csv_file, time_limit_minutes=2, batch_size=1000):
        self.time_limit = time_limit_minutes * 60
        self.start_time = time.time()
        self.end_time = self.start_time + self.time_limit
        self.batch_size = batch_size

        # Updated product catalog to exactly match the provided catalog
        self.product_catalog = {
            'network_plans': {
                'Fiber 500': {
                    'speed': 500,
                    'price': 45.00,
                    'features': ['500Mbps Connection', 'Includes one standard WIFI router']
                },
                'Fiber 1 Gig': {
                    'speed': 1000,
                    'price': 65.00,
                    'features': ['1Gbps Connection', 'Includes one standard WIFI router']
                },
                'Fiber 2 Gig': {
                    'speed': 2000,
                    'price': 99.00,
                    'features': ['2Gbps Connection', 'Includes one upgraded WIFI router and one extender']
                },
                'Fiber 5 Gig': {
                    'speed': 5000,
                    'price': 129.00,
                    'features': ['5Gbps Connection', 'Includes one premium router']
                },
                'Fiber 7 Gig': {
                    'speed': 7000,
                    'price': 299.00,
                    'features': ['7Gbps Connection', 'Includes one premium router and and an extender at no charge']
                }
            },
            'add_ons': {
                'Additional Extender': {
                    'price': 5.00,
                    'features': ['Additional extender for above products'],
                    'category': 'hardware'
                },
                'Whole-Home Wi-Fi': {
                    'price': 10.00,
                    'features': [
                        'Get the latest generation router with up to two additional extenders provided to Fiber 2 Gig speeds and below and 1 extender for 7 and 5 Gig',
                        'Helps with dead spots, drop-offs and buffering',
                        'Devices are in addition to the free device included with internet service'
                    ],
                    'category': 'connectivity'
                },
                'Unbreakable Wi-Fi': {
                    'price': 25.00,
                    'features': [
                        'Backup internet during unexpected Frontier fiber network outages',
                        'Automatic switchover during network outages',
                        'Easily managed through the mobile app',
                        'Self-install via mobile app or free tech installation',
                        'Includes 130GB of 4G LTE cellular data per month'
                    ],
                    'category': 'connectivity'
                },
                'Battery back-up for Unbreakable Wi-Fi': {
                    'price': 130.00,
                    'features': ['Optional Battery Backup Unit (power pack) offers up to 4 hours of power during outages'],
                    'category': 'hardware',
                    'one_time': True
                },
                'Wi-Fi Security': {
                    'price': 5.00,
                    'features': [
                        'Advanced security managed via the app',
                        'Protects devices connected on the home network',
                        'Parental controls restrict duration, timing, and content',
                        'Ad blocking to improve browsing experience',
                        'Internet activity reports by day, week, month'
                    ],
                    'category': 'security'
                },
                'Wi-Fi Security Plus': {
                    'price': 10.00,
                    'features': [
                        'Includes Wi-Fi Security, Multi-Device Security, VPN & Password Manager',
                        'Advanced security protects devices connected to home network and up to 3 devices',
                        'Parental controls restrict duration, timing, and content',
                        'Ad blocking to stop pop-ups',
                        'Internet activity reports',
                        'Guardian VPN for up to 5 devices',
                        'Password Manager for 1 master password'
                    ],
                    'category': 'security'
                },
                'Total Shield': {
                    'price': 10.00,
                    'features': [
                        'Security for up to 10 devices',
                        'Browsing protection',
                        'Parental controls',
                        'VPN protection',
                        'Password Manager'
                    ],
                    'category': 'security'
                },
                'My Premium Tech Pro': {
                    'price': 10.00,
                    'features': [],
                    'category': 'support'
                },
                'Identity Protection': {
                    'price': 10.00,
                    'features': [
                        'Personal information monitoring',
                        'Up to $1M in identity theft insurance',
                        'Family Add-On includes 1 additional user'
                    ],
                    'category': 'security'
                },
                'Identity Protection Family Add-On': {
                    'price': 5.00,
                    'features': ['Additional user for Identity Protection'],
                    'category': 'security'
                },
                'YouTube TV': {
                    'price': 79.99,
                    'features': [
                        '100+ live channels',
                        'No costly set-top-box',
                        '3 simultaneous streams',
                        'Unlimited DVR storage'
                    ],
                    'category': 'entertainment'
                }
            }
        }

        print(f"Initializing with {time_limit_minutes} minute time limit...")
        self.load_and_prepare_data(csv_file)
        self.recommendations = {}
        self.processed_count = 0
        self.error_count = 0

    def load_and_prepare_data(self, csv_file):
        print("Loading data...")
        self.data = pd.read_csv(csv_file)
        print("Preparing features...")
        self.prepare_features()
        print("Creating customer segments...")
        self.create_customer_segments()
        print(f"Initialized with {len(self.user_mapping)} users.")

    def prepare_features(self):
        required_columns = ['acct_id', 'rx_avg_bps', 'tx_avg_bps', 'wireless_clients_count', 
                          'wired_clients_count', 'rssi_mean', 'extenders', 'network_speed', 
                          'city', 'state']
        
        for col in required_columns:
            if col not in self.data.columns:
                raise ValueError(f"Required column '{col}' not found in data")

        self.user_mapping = {user: idx for idx, user in enumerate(self.data['acct_id'].unique())}
        
        self.user_features = self.data.groupby('acct_id').agg({
            'rx_avg_bps': 'mean',
            'tx_avg_bps': 'mean',
            'wireless_clients_count': 'mean',
            'wired_clients_count': 'mean',
            'rssi_mean': 'mean',
            'extenders': lambda x: x.fillna(0).max(),
            'network_speed': 'first',
            'city': 'first',
            'state': 'first'
        }).reset_index()

        self.user_features['network_speed'] = self.user_features['network_speed'].fillna('500M')
        self.user_features['current_speed'] = pd.to_numeric(
            self.user_features['network_speed'].str.rstrip('M'),
            errors='coerce'
        ).fillna(500)

        self.user_features['total_bandwidth'] = (
            self.user_features['rx_avg_bps'].fillna(0) + 
            self.user_features['tx_avg_bps'].fillna(0)
        )
        self.user_features['total_devices'] = (
            self.user_features['wireless_clients_count'].fillna(0) + 
            self.user_features['wired_clients_count'].fillna(0)
        )
        self.user_features['wireless_ratio'] = (
            self.user_features['wireless_clients_count'].fillna(0) / 
            (self.user_features['total_devices'] + 1)
        )
        self.user_features['bandwidth_utilization'] = (
            self.user_features['total_bandwidth'] / 
            (self.user_features['current_speed'] * 1e6)
        ).clip(0, 1)

        numeric_features = ['total_bandwidth', 'total_devices', 'rssi_mean', 
                          'bandwidth_utilization', 'wireless_ratio']
        scaler = StandardScaler()
        self.user_features[numeric_features] = scaler.fit_transform(
            self.user_features[numeric_features].fillna(0)
        )

    def create_customer_segments(self):
        cluster_features = ['total_bandwidth', 'total_devices', 'rssi_mean', 
                          'bandwidth_utilization', 'wireless_ratio']
        kmeans = MiniBatchKMeans(n_clusters=5, batch_size=self.batch_size)
        self.user_features['segment'] = kmeans.fit_predict(self.user_features[cluster_features])
        X = self.user_features[cluster_features]
        y = self.user_features['segment']
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        self.classifier = RandomForestClassifier(n_estimators=100)
        self.classifier.fit(X_train, y_train)
        y_pred = self.classifier.predict(X_test)
        print(f"Classifier accuracy: {accuracy_score(y_test, y_pred)}")

    def recommend_plan(self, current_speed, utilization, total_devices):
        speed_options = sorted([plan['speed'] for plan in self.product_catalog['network_plans'].values()])
        required_speed = current_speed * max(1.0, utilization) * np.log1p(total_devices)
        
        for speed in speed_options:
            if speed >= required_speed:
                plan_name = f"Fiber {speed if speed >= 1000 else str(speed)}"
                if speed >= 1000:
                    plan_name = plan_name.replace('1000', '1 Gig').replace('2000', '2 Gig').replace('5000', '5 Gig').replace('7000', '7 Gig')
                plan_data = self.product_catalog['network_plans'][plan_name]
                return {
                    'name': plan_name,
                    'speed': speed,
                    'price': plan_data['price'],
                    'features': plan_data['features']
                }
        
        # If no plan meets requirements, recommend highest available
        plan_name = 'Fiber 7 Gig'
        plan_data = self.product_catalog['network_plans'][plan_name]
        return {
            'name': plan_name,
            'speed': 7000,
            'price': plan_data['price'],
            'features': plan_data['features']
        }

    def recommend_addons(self, total_devices, signal_strength, wireless_ratio, city, city_data):
        try:
            recommendations = []
            
            total_devices = float(total_devices) if pd.notnull(total_devices) else 0
            signal_strength = float(signal_strength) if pd.notnull(signal_strength) else -70
            wireless_ratio = float(wireless_ratio) if pd.notnull(wireless_ratio) else 0

            if wireless_ratio > 0.3 or signal_strength < -60:
                recommendations.append({
                    'product': 'Whole-Home Wi-Fi',
                    'reason': "Improve coverage and eliminate dead spots",
                    'price': self.product_catalog['add_ons']['Whole-Home Wi-Fi']['price'],
                    'features': self.product_catalog['add_ons']['Whole-Home Wi-Fi']['features']
                })

            if total_devices > 5:
                recommendations.append({
                    'product': 'Wi-Fi Security Plus',
                    'reason': "Protect multiple devices and enhance security",
                    'price': self.product_catalog['add_ons']['Wi-Fi Security Plus']['price'],
                    'features': self.product_catalog['add_ons']['Wi-Fi Security Plus']['features']
                })
            elif total_devices > 0:
                recommendations.append({
                    'product': 'Wi-Fi Security',
                    'reason': "Basic protection for your devices",
                    'price': self.product_catalog['add_ons']['Wi-Fi Security']['price'],
                    'features': self.product_catalog['add_ons']['Wi-Fi Security']['features']
                })

            if wireless_ratio > 0.5 and total_devices > 3:
                recommendations.append({
                    'product': 'Unbreakable Wi-Fi',
                    'reason': "Ensure uninterrupted connectivity",
                    'price': self.product_catalog['add_ons']['Unbreakable Wi-Fi']['price'],
                    'features': self.product_catalog['add_ons']['Unbreakable Wi-Fi']['features']
                })

            recommendations.append({
                'product': 'My Premium Tech Pro',
                'reason': "24/7 technical support",
                'price': self.product_catalog['add_ons']['My Premium Tech Pro']['price'],
                'features': self.product_catalog['add_ons']['My Premium Tech Pro']['features']
            })

            return recommendations[:3]

        except Exception as e:
            print(f"Error in recommend_addons: {str(e)}")
            return [{
                'product': 'My Premium Tech Pro',
                'reason': "24/7 technical support",
                'price': self.product_catalog['add_ons']['My Premium Tech Pro']['price'],
                'features': self.product_catalog['add_ons']['My Premium Tech Pro']['features']
            }]

    def get_recommendations(self, user_id):
        try:
            user_data = self.user_features[self.user_features['acct_id'] == user_id]
            if user_data.empty:
                raise ValueError(f"No data found for user {user_id}")
            
            user_data = user_data.iloc[0]
            
            segment_features = ['total_bandwidth', 'total_devices', 'rssi_mean', 
                              'bandwidth_utilization', 'wireless_ratio']
            segment_data = user_data[segment_features].fillna(0)
            segment = int(self.classifier.predict([segment_data])[0])
            
            recommended_plan = self.recommend_plan(
                current_speed=float(user_data['current_speed']),
                utilization=float(user_data['bandwidth_utilization']),
                total_devices=float(user_data['total_devices'])
            )
            
            recommended_addons = self.recommend_addons(
                total_devices=user_data['total_devices'],
                signal_strength=user_data['rssi_mean'],
                wireless_ratio=user_data['wireless_ratio'],
                city=user_data['city'],
                city_data=self.user_features
            )
            
            return {
                "id": str(user_id),
                "insights": [{
                    "timestamp": datetime.now().isoformat(),
                    "type": "recommendation",
                    "data": {
                        "user_profile": {
                            "segment": segment,
                            "bandwidth_usage_mbps": float(user_data['bandwidth_utilization']),
                            "total_devices": int(user_data['total_devices']),
                            "current_speed": float(user_data['current_speed']),
                            "current_extenders": int(user_data['extenders']),
                            "signal_strength": float(user_data['rssi_mean']),
                            "location": {
                                "city": str(user_data['city']),
                                "state": str(user_data['state'])
                            }
                        },
                        "recommended_plan": recommended_plan,
                        "recommended_add_ons": recommended_addons
                    }
                }]
            }
        except Exception as e:
            print(f"Error generating recommendations for {user_id}: {str(e)}")
            return None

    def process_users(self):
        print("Processing users...")
        for user_id in self.user_features['acct_id']:
            if time.time() > self.end_time:
                print("Time limit exceeded, stopping processing.")
                break
            try:
                recommendations = self.get_recommendations(user_id)
                if recommendations:
                    self.recommendations[user_id] = recommendations
                self.processed_count += 1
            except Exception as e:
                self.error_count += 1
                print(f"Error processing user {user_id}: {e}")
        print(f"Processed {self.processed_count} users with {self.error_count} errors.")

    def save_recommendations(self, file_name="recommendations.json"):
        """Save all recommendations to a JSON file."""
        try:
            print("Preparing recommendations for saving...")
            # Filter out None values and ensure all data is JSON serializable
            valid_recommendations = {
                str(k): {
                    "user_id": str(k),
                    "timestamp": v['insights'][0]['timestamp'],
                    "user_profile": {
                        "segment": v['insights'][0]['data']['user_profile']['segment'],
                        "bandwidth_usage_mbps": v['insights'][0]['data']['user_profile']['bandwidth_usage_mbps'],
                        "total_devices": v['insights'][0]['data']['user_profile']['total_devices'],
                        "current_speed": v['insights'][0]['data']['user_profile']['current_speed'],
                        "current_extenders": v['insights'][0]['data']['user_profile']['current_extenders'],
                        "signal_strength": v['insights'][0]['data']['user_profile']['signal_strength'],
                        "location": {
                            "city": v['insights'][0]['data']['user_profile']['location']['city'],
                            "state": v['insights'][0]['data']['user_profile']['location']['state']
                        }
                    },
                    "recommendations": {
                        "plan": {
                            "name": v['insights'][0]['data']['recommended_plan']['name'],
                            "speed": v['insights'][0]['data']['recommended_plan']['speed'],
                            "price": v['insights'][0]['data']['recommended_plan']['price'],
                            "features": v['insights'][0]['data']['recommended_plan']['features']
                        },
                        "add_ons": [{
                            "product": addon['product'],
                            "reason": addon['reason'],
                            "price": addon['price'],
                            "features": addon['features']
                        } for addon in v['insights'][0]['data']['recommended_add_ons']]
                    }
                }
                for k, v in self.recommendations.items() 
                if v is not None
            }
            
            print(f"Saving {len(valid_recommendations)} recommendations...")
            with open(file_name, "w") as file:
                json.dump(valid_recommendations, file, indent=4, default=str)
            print(f"Recommendations saved to {file_name}")
            
        except Exception as e:
            print(f"Error saving recommendations: {str(e)}")
            # Attempt to save to a backup file
            try:
                backup_file = f"backup_{int(time.time())}.json"
                with open(backup_file, "w") as file:
                    json.dump(valid_recommendations, file, default=str)
                print(f"Backup saved to {backup_file}")
            except:
                print("Failed to save backup file")


def main():
    try:
        recommender = EnhancedPatternBasedRecommender("output.csv", time_limit_minutes=5)
        recommender.process_users()
        recommender.save_recommendations("recommendations.json")
    except Exception as e:
        print(f"Error in main: {str(e)}")


if __name__ == "__main__":
    main()

Initializing with 5 minute time limit...
Loading data...
Preparing features...
Creating customer segments...
Classifier accuracy: 0.9962951936753663
Initialized with 151152 users.
Processing users...
Time limit exceeded, stopping processing.
Processed 7745 users with 0 errors.
Preparing recommendations for saving...
Saving 7745 recommendations...
Recommendations saved to recommendations.json
