In [23]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.cluster import MiniBatchKMeans
import json
import time
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

class EnhancedPatternBasedRecommender:
    def __init__(self, csv_file, time_limit_minutes=5, batch_size=1000):
        self.time_limit = time_limit_minutes * 60
        self.start_time = time.time()
        self.end_time = self.start_time + self.time_limit
        self.batch_size = batch_size

        # Product catalog (keeping the existing catalog)
        self.product_catalog = {
            'network_plans': {
                '100M': {'speed': 100, 'price': 35.00, 'category': 'basic'},
                '150M': {'speed': 150, 'price': 39.00, 'category': 'basic'},
                '200M': {'speed': 200, 'price': 40.00, 'category': 'basic'},
                '300M': {'speed': 300, 'price': 42.00, 'category': 'standard'},
                '500M': {'speed': 500, 'price': 45.00, 'category': 'standard'},
                '1000M': {'speed': 1000, 'price': 65.00, 'category': 'premium'},
                '2000M': {'speed': 2000, 'price': 99.00, 'category': 'premium'},
                '5000M': {'speed': 5000, 'price': 129.00, 'category': 'ultra'},
                '7000M': {'speed': 7000, 'price': 299.00, 'category': 'ultra'}
            },
            'add_ons': {
                'Whole-Home Wi-Fi': {'price': 10.00, 'category': 'connectivity'},
                'Additional Extender': {'price': 5.00, 'category': 'connectivity'},
                'Wi-Fi Security': {'price': 5.00, 'category': 'security'},
                'Wi-Fi Security Plus': {'price': 10.00, 'category': 'security'},
                'Total Shield': {'price': 10.00, 'category': 'security'},
                'My Premium Tech Pro': {'price': 10.00, 'category': 'support'},
                'YouTube TV': {'price': 79.99, 'category': 'entertainment'}
            }
        }

        print(f"Initializing with {time_limit_minutes} minute time limit...")
        self.load_and_prepare_data(csv_file)
        self.recommendations = {}
        self.processed_count = 0
        self.error_count = 0

    def load_and_prepare_data(self, csv_file):
        print("Loading data...")
        self.data = pd.read_csv(csv_file)
        print("Preparing features...")
        self.prepare_features()
        print("Creating customer segments...")
        self.create_customer_segments()
        print(f"Initialized with {len(self.user_mapping)} users.")

    def prepare_features(self):
        # Add error handling for column existence
        required_columns = ['acct_id', 'rx_avg_bps', 'tx_avg_bps', 'wireless_clients_count', 
                          'wired_clients_count', 'rssi_mean', 'extenders', 'network_speed', 
                          'city', 'state']
        
        for col in required_columns:
            if col not in self.data.columns:
                raise ValueError(f"Required column '{col}' not found in data")

        self.user_mapping = {user: idx for idx, user in enumerate(self.data['acct_id'].unique())}
        
        # Add default values for aggregation
        self.user_features = self.data.groupby('acct_id').agg({
            'rx_avg_bps': 'mean',
            'tx_avg_bps': 'mean',
            'wireless_clients_count': 'mean',
            'wired_clients_count': 'mean',
            'rssi_mean': 'mean',
            'extenders': lambda x: x.fillna(0).max(),  # Handle NaN in extenders
            'network_speed': 'first',
            'city': 'first',
            'state': 'first'
        }).reset_index()

        # Handle potential NaN values and invalid network speeds
        self.user_features['network_speed'] = self.user_features['network_speed'].fillna('100M')
        self.user_features['current_speed'] = pd.to_numeric(
            self.user_features['network_speed'].str.rstrip('M'),
            errors='coerce'
        ).fillna(100)

        # Calculate derived features with safeguards
        self.user_features['total_bandwidth'] = (
            self.user_features['rx_avg_bps'].fillna(0) + 
            self.user_features['tx_avg_bps'].fillna(0)
        )
        self.user_features['total_devices'] = (
            self.user_features['wireless_clients_count'].fillna(0) + 
            self.user_features['wired_clients_count'].fillna(0)
        )
        self.user_features['wireless_ratio'] = (
            self.user_features['wireless_clients_count'].fillna(0) / 
            (self.user_features['total_devices'] + 1)
        )
        self.user_features['bandwidth_utilization'] = (
            self.user_features['total_bandwidth'] / 
            (self.user_features['current_speed'] * 1e6)  # Convert to bps
        ).clip(0, 1)  # Clip to reasonable range

        # Normalize numeric features
        numeric_features = ['total_bandwidth', 'total_devices', 'rssi_mean', 
                          'bandwidth_utilization', 'wireless_ratio']
        scaler = StandardScaler()
        self.user_features[numeric_features] = scaler.fit_transform(
            self.user_features[numeric_features].fillna(0)
        )

    def create_customer_segments(self): 
        cluster_features = ['total_bandwidth', 'total_devices', 'rssi_mean', 'bandwidth_utilization', 'wireless_ratio'] 
        kmeans = MiniBatchKMeans(n_clusters=5, batch_size=self.batch_size) 
        self.user_features['segment'] = kmeans.fit_predict(self.user_features[cluster_features]) 
        X = self.user_features[cluster_features] 
        y = self.user_features['segment'] 
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) 
        self.classifier = RandomForestClassifier(n_estimators=100) 
        self.classifier.fit(X_train, y_train) 
        y_pred = self.classifier.predict(X_test) 
        print(f"Classifier accuracy: {accuracy_score(y_test, y_pred)}") 

    def recommend_plan(self, current_speed, utilization, total_devices): 
        speed_options = sorted([float(speed.rstrip('M')) for speed in self.product_catalog['network_plans'].keys()]) 
        required_speed = current_speed * max(1.0, utilization) * np.log1p(total_devices) 
        for speed in speed_options: 
            if speed >= required_speed: 
                plan_name = f"{int(speed)}M" 
                return { 'name': f"Fiber {plan_name}", 'speed': speed, 'price': self.product_catalog['network_plans'][plan_name]['price'], 'category': self.product_catalog['network_plans'][plan_name]['category'] } 
        return { 'name': "Custom Ultra Plan", 'speed': speed_options[-1], 'price': self.product_catalog['network_plans']['7000M']['price'], 'category': 'utlra' }

    def recommend_addons(self, total_devices, signal_strength, wireless_ratio, city, city_data):
        """Recommend add-ons based on usage patterns and similar users in the same city."""
        try:
            recommendations = []
            
            # Ensure parameters are numeric and within reasonable ranges
            total_devices = float(total_devices) if pd.notnull(total_devices) else 0
            signal_strength = float(signal_strength) if pd.notnull(signal_strength) else -70
            wireless_ratio = float(wireless_ratio) if pd.notnull(wireless_ratio) else 0
            
            if wireless_ratio > 0.3 or signal_strength < -60:
                recommendations.append({
                    'product': 'Whole-Home Wi-Fi',
                    'reason': "For better coverage",
                    'price': self.product_catalog['add_ons']['Whole-Home Wi-Fi']['price']
                })
            
            if total_devices > 3:
                recommendations.append({
                    'product': 'Wi-Fi Security Plus',
                    'reason': "For enhanced security with multiple devices",
                    'price': self.product_catalog['add_ons']['Wi-Fi Security Plus']['price']
                })
            
            # Safe handling of city-based recommendations
            if isinstance(city, str) and not city_data.empty:
                similar_users = city_data[city_data['city'] == city]
                if not similar_users.empty and 'add_ons' in similar_users.columns:
                    most_common_addon = similar_users['add_ons'].mode().iloc[0]
                    if most_common_addon in self.product_catalog['add_ons']:
                        recommendations.append({
                            'product': most_common_addon,
                            'reason': "Popular in your city",
                            'price': self.product_catalog['add_ons'][most_common_addon]['price']
                        })
            
            # Ensure we always have at least one recommendation
            if not recommendations:
                recommendations.append({
                    'product': 'My Premium Tech Pro',
                    'reason': "To enhance your network security and support",
                    'price': self.product_catalog['add_ons']['My Premium Tech Pro']['price']
                })
            
            return recommendations[:3]  # Limit to top 3 recommendations
            
        except Exception as e:
            print(f"Error in recommend_addons: {str(e)}")
            # Return a safe default recommendation
            return [{
                'product': 'My Premium Tech Pro',
                'reason': "Recommended for all users",
                'price': self.product_catalog['add_ons']['My Premium Tech Pro']['price']
            }]

    def process_users(self): 
        print("Processing users...") 
        for user_id in self.user_features['acct_id']: 
            if time.time() > self.end_time: 
                print("Time limit exceeded, stopping processing.") 
                break 
            try: 
                recommendations = self.get_recommendations(user_id) 
                if recommendations: self.recommendations[user_id] = recommendations 
                self.processed_count += 1 
            except Exception as e: 
                self.error_count += 1 
                print(f"Error processing user {user_id}: {e}") 
        print(f"Processed {self.processed_count} users with {self.error_count} errors.")

    def get_recommendations(self, user_id):
        """Generate detailed recommendations for a user."""
        try:
            # Safely get user data
            user_data = self.user_features[self.user_features['acct_id'] == user_id]
            if user_data.empty:
                raise ValueError(f"No data found for user {user_id}")
            
            user_data = user_data.iloc[0]
            
            # Safely predict segment
            segment_features = ['total_bandwidth', 'total_devices', 'rssi_mean', 
                              'bandwidth_utilization', 'wireless_ratio']
            segment_data = user_data[segment_features].fillna(0)
            segment = int(self.classifier.predict([segment_data])[0])
            
            # Get recommendations
            recommended_plan = self.recommend_plan(
                current_speed=float(user_data['current_speed']),
                utilization=float(user_data['bandwidth_utilization']),
                total_devices=float(user_data['total_devices'])
            )
            
            recommended_addons = self.recommend_addons(
                total_devices=user_data['total_devices'],
                signal_strength=user_data['rssi_mean'],
                wireless_ratio=user_data['wireless_ratio'],
                city=user_data['city'],
                city_data=self.user_features
            )
            
            return {
                "id": str(user_id),
                "insights": [{
                    "timestamp": datetime.now().isoformat(),
                    "type": "recommendation",
                    "data": {
                        "user_profile": {
                            "segment": segment,
                            "bandwidth_usage_mbps": float(user_data['bandwidth_utilization']),
                            "total_devices": int(user_data['total_devices']),
                            "current_speed": float(user_data['current_speed']),
                            "current_extenders": int(user_data['extenders']),
                            "signal_strength": float(user_data['rssi_mean']),
                            "location": {
                                "city": str(user_data['city']),
                                "state": str(user_data['state'])
                            }
                        },
                        "recommended_plan": recommended_plan,
                        "recommended_add_ons": recommended_addons
                    }
                }]
            }
        except Exception as e:
            print(f"Error generating recommendations for {user_id}: {str(e)}")
            return None
'''
    def save_recommendations(self, file_name="recommendations.json"):
        """Save all recommendations to a JSON file."""
        try:
            # Filter out None values and ensure all data is JSON serializable
            valid_recommendations = {
                str(k): v for k, v in self.recommendations.items() 
                if v is not None
            }
            
            with open(file_name, "w") as file:
                json.dump(valid_recommendations, file, indent=4, default=str)
            print(f"Recommendations saved to {file_name}")
            
        except Exception as e:
            print(f"Error saving recommendations: {str(e)}")
            # Attempt to save to a backup file
            try:
                backup_file = f"backup_{int(time.time())}.json"
                with open(backup_file, "w") as file:
                    json.dump(valid_recommendations, file)
                print(f"Backup saved to {backup_file}")
            except:
                print("Failed to save backup file")
'''

def save_recommendations(self, file_name="recommendations.json"):
    """Save all recommendations to a CSV file."""
    try:
        # Create a list to store flattened recommendations
        flattened_data = []
        
        # Flatten the nested recommendations
        for user_id, recommendation in self.recommendations.items():
            if recommendation is None:
                continue
                
            # Get user profile data
            user_profile = recommendation['insights'][0]['data']['user_profile']
            recommended_plan = recommendation['insights'][0]['data']['recommended_plan']
            
            # Base row with user profile data
            base_row = {
                'user_id': user_id,
                'timestamp': recommendation['insights'][0]['timestamp'],
                'segment': user_profile['segment'],
                'bandwidth_usage_mbps': user_profile['bandwidth_usage_mbps'],
                'total_devices': user_profile['total_devices'],
                'current_speed': user_profile['current_speed'],
                'current_extenders': user_profile['current_extenders'],
                'signal_strength': user_profile['signal_strength'],
                'city': user_profile['location']['city'],
                'state': user_profile['location']['state'],
            }
            
            # Add recommended add-ons
            for idx, addon in enumerate(recommendation['insights'][0]['data']['recommended_add_ons']):
                base_row[f'addon_{idx+1}_name'] = addon['product']
                base_row[f'addon_{idx+1}_reason'] = addon['reason']
                base_row[f'addon_{idx+1}_price'] = addon['price']
            
            flattened_data.append(base_row)
        
        # Convert to DataFrame and save as CSV
        if flattened_data:
            df = pd.DataFrame(flattened_data)
            df.to_csv(file_name, index=False)
            print(f"Recommendations saved to {file_name}")
        else:
            print("No valid recommendations to save")
            
    except Exception as e:
        print(f"Error saving recommendations: {str(e)}")
        # Attempt to save to a backup file
        try:
            backup_file = f"backup_{int(time.time())}.csv"
            if flattened_data:
                pd.DataFrame(flattened_data).to_csv(backup_file, index=False)
                print(f"Backup saved to {backup_file}")
        except:
            print("Failed to save backup file")

def main():
    try:
        recommender = EnhancedPatternBasedRecommender("output.csv", time_limit_minutes=5)
        recommender.process_users()
        recommender.save_recommendations()
    except Exception as e:
        print(f"Error in main: {str(e)}")

if __name__ == "__main__":
    main()

Initializing with 5 minute time limit...
Loading data...
Error in main: [Errno 2] No such file or directory: 'output.csv'
