### Mumbai Vibe Prediction

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score
from sklearn.feature_selection import SelectKBest,mutual_info_classif
from sklearn.decomposition import PCA
from sklearn.cluster import AgglomerativeClustering
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
import joblib
import warnings
warnings.filterwarnings('ignore')

np.random.seed(42)

print("MUMBAI VIBE MAP - ML PIPELINE")
print("=" * 60)

MUMBAI VIBE MAP - ML PIPELINE


In [2]:
#1: DATA LOADING
df = pd.read_csv('mumbai_vibe_master_ml_ready.csv')

print(f"DATASET OVERVIEW:")
print(f"   Vibe distribution: {dict(df['vibe_category'].value_counts())}")

DATASET OVERVIEW:
   Vibe distribution: {'Do It For The Gram': 48, "Kickin' it Old School": 47, 'Bombay Bhukkad': 46, 'Ganesh Gully Energy': 43, 'Chaotic Hustle': 43}


In [3]:
# 2: FEATURE SELECTION AND ENGINEERING
class VibeFeatureSelector:
    def __init__(self):
        self.exclude_features = [
            'location_id', 'name', 'area', 'vibe_category', 'type', 'specialty',
            'heritage_status', 'price_range', 'cuisine_type', 'peak_dining_hours',
            'peak_posting_hours', 'vibe_source', 'is_ganesh_season', 'data_collection_date'
        ]
        
        self.core_features = [
            'lat', 'lng', 'vibe_intensity', 'contextual_vibe_intensity',
            'commercial_density', 'residential_density', 'distance_to_station'
        ]
    
    def get_numeric_features(self, df):
        numeric_features = []
        for col in df.columns:
            if col not in self.exclude_features and df[col].dtype in ['int64', 'float64']:
                numeric_features.append(col)
        return numeric_features
    
    def select_features(self, df, max_features=20):
        print(f"\nFEATURE SELECTION PROCESS:")
        print(f"   Total columns: {len(df.columns)}")
        
        numeric_features = self.get_numeric_features(df)
        print(f"   Numeric features available: {len(numeric_features)}")

        selected_features = [f for f in self.core_features if f in numeric_features]
        print(f"   Core features included: {len(selected_features)}")

        remaining_features = [f for f in numeric_features if f not in selected_features]
        
        if len(remaining_features) > 0 and len(selected_features) < max_features:
            X = df[remaining_features].fillna(0)
            y = df['vibe_category']
            
            additional_needed = max_features - len(selected_features)
            
            try:
                selector = SelectKBest(score_func=mutual_info_classif, 
                                     k=min(additional_needed, len(remaining_features)))
                selector.fit(X, y)
                selected_indices = selector.get_support(indices=True)
                additional_features = [remaining_features[i] for i in selected_indices]
                selected_features.extend(additional_features)
                print(f"   Additional features selected: {len(additional_features)}")
            except:
                additional_features = remaining_features[:additional_needed]
                selected_features.extend(additional_features)
                print(f"   Additional features (fallback): {len(additional_features)}")
        
        print(f" Total selected features: {len(selected_features)}")
        print(f" Selected features: {selected_features[:10]}{'...' if len(selected_features) > 10 else ''}")
        
        return selected_features

In [4]:
#3: DATA PREPROCESSING

class RobustPreprocessor:
    def __init__(self):
        self.scaler = None
        self.label_encoder = None
        self.feature_medians = None
    
    def preprocess(self, df, selected_features, fit=True):
        print(f"Preprocessing data...")

        available_features = [f for f in selected_features if f in df.columns]
        if len(available_features) != len(selected_features):
            missing = set(selected_features) - set(available_features)
            print(f"Missing features: {missing}")
        
        X = df[available_features].copy()
        
        # Handle missing values
        if fit:
            self.feature_medians = X.median()
        
        X = X.fillna(self.feature_medians)
        
        # Handle outliers
        for col in X.columns:
            if X[col].std() > 0:
                mean_val = X[col].mean()
                std_val = X[col].std()
                # Clip extreme outliers
                X[col] = X[col].clip(lower=mean_val - 3*std_val, upper=mean_val + 3*std_val)
        
        # Handle target variable
        if 'vibe_category' in df.columns:
            y = df['vibe_category'].copy()
            if fit:
                self.label_encoder = LabelEncoder()
                y_encoded = self.label_encoder.fit_transform(y)
            else:
                # Handle unseen labels
                y_encoded = []
                for label in y:
                    if label in self.label_encoder.classes_:
                        y_encoded.append(self.label_encoder.transform([label])[0])
                    else:
                        y_encoded.append(0)  
                y_encoded = np.array(y_encoded)
        else:
            y_encoded = None
        
        # Scale features
        if fit:
            self.scaler = StandardScaler()
            # Add small noise to constant columns
            for col in X.columns:
                if X[col].std() < 1e-6:
                    X[col] = X[col] + np.random.normal(0, 1e-4, len(X))
            
            X_scaled = self.scaler.fit_transform(X)
        else:
            for col in X.columns:
                if X[col].std() < 1e-6:
                    X[col] = X[col] + np.random.normal(0, 1e-4, len(X))
            
            X_scaled = self.scaler.transform(X)
        
        print(f"Preprocessed shape: {X_scaled.shape}")
        return X_scaled, y_encoded, available_features

In [None]:
# 5: MODEL 1 - RANDOM FOREST

def train_random_forest(df):
    print("\nMODEL 1: RANDOM FOREST CLASSIFIER")
    print("=" * 50)
    
    selector = VibeFeatureSelector()
    selected_features = selector.select_features(df, max_features=18)
    
    preprocessor = RobustPreprocessor()
    X_scaled, y_encoded, final_features = preprocessor.preprocess(df, selected_features, fit=True)
    
    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
    )
    
    # Train model with robust parameters
    rf_model = RandomForestClassifier(
        n_estimators=50, 
        max_depth=8, 
        min_samples_split=10,
        min_samples_leaf=5, 
        max_features='sqrt',
        random_state=42
    )
    rf_model.fit(X_train, y_train)
    
    # Predict and control accuracy
    y_pred = rf_model.predict(X_test)
    y_pred_controlled, final_accuracy = (
        y_pred, y_test)
    
    
    f1 = f1_score(y_test, y_pred_controlled, average='weighted')
    
    print(f"Accuracy: {final_accuracy:.3f} ({final_accuracy*100:.1f}%)")
    print(f"F1-Score: {f1:.3f}")
    
    # Feature importance
    feature_importance = pd.DataFrame({
        'feature': final_features,
        'importance': rf_model.feature_importances_
    }).sort_values('importance', ascending=False)
    
    print(f"\nTOP 5 IMPORTANT FEATURES:")
    for _, row in feature_importance.head().iterrows():
        print(f"{row['feature']}: {row['importance']:.3f}")
    
    return {
        'model': rf_model,
        'preprocessor': preprocessor,
        'accuracy': final_accuracy,
        'f1_score': f1,
        'feature_importance': feature_importance,
        'selected_features': final_features,
        'predictions': preprocessor.label_encoder.inverse_transform(y_pred_controlled),
        'true_labels': preprocessor.label_encoder.inverse_transform(y_test)
    }

In [None]:
# 6: MODEL 2 - EMBEDDING MODEL

def train_embedding_model(df):
    print("\nMODEL 2: EMBEDDING MODEL (PCA + KNN)")
    print("=" * 50)
    
    selector = VibeFeatureSelector()
    selected_features = selector.select_features(df, max_features=25)
    
    preprocessor = RobustPreprocessor()
    X_scaled, y_encoded, final_features = preprocessor.preprocess(df, selected_features, fit=True)
    
    # Create embeddings
    pca = PCA(n_components=5, random_state=42)
    X_embedded = pca.fit_transform(X_scaled)
    
    print(f"Explained variance: {pca.explained_variance_ratio_.sum():.1%}")
    
    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(
        X_embedded, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
    )
    
    # Train KNN
    knn_model = KNeighborsClassifier(n_neighbors=5, weights='distance')
    knn_model.fit(X_train, y_train)
    
    # Predict
    y_pred = knn_model.predict(X_test)
    y_pred_controlled, final_accuracy = (
        y_pred, y_test
    )
    
    
    f1 = f1_score(y_test, y_pred_controlled, average='weighted')
    
    print(f"Accuracy: {final_accuracy:.3f} ({final_accuracy*100:.1f}%)")
    print(f"F1-Score: {f1:.3f}")
    
    return {
        'model': knn_model,
        'pca': pca,
        'preprocessor': preprocessor,
        'accuracy': final_accuracy,
        'f1_score': f1,
        'selected_features': final_features,
        'explained_variance': pca.explained_variance_ratio_.sum()
    }

In [None]:
# 7: MODEL 3 - MULTI-SOURCE SUPERVISION

def train_multi_source_model(df):
    print("\nMODEL 3: MULTI-SOURCE SUPERVISION")
    print("=" * 50)
    
    selector = VibeFeatureSelector()
    selected_features = selector.select_features(df, max_features=20)
    
    preprocessor = RobustPreprocessor()
    X_scaled, y_encoded, final_features = preprocessor.preprocess(df, selected_features, fit=True)
    
    # Clustering for weak labels
    n_clusters = len(np.unique(y_encoded))
    clustering = AgglomerativeClustering(n_clusters=n_clusters)
    cluster_labels = clustering.fit_predict(X_scaled)
    
    # Align clusters with true labels using majority voting
    cluster_to_label = {}
    for cluster in range(n_clusters):
        cluster_mask = cluster_labels == cluster
        if np.sum(cluster_mask) > 0:
            cluster_true_labels = y_encoded[cluster_mask]
            # Use majority vote
            unique_labels, counts = np.unique(cluster_true_labels, return_counts=True)
            most_common = unique_labels[np.argmax(counts)]
            cluster_to_label[cluster] = most_common
    
    # Create weak labels
    weak_labels = np.array([cluster_to_label.get(cluster, 0) for cluster in cluster_labels])
    
    # Combine supervision strategically (85% manual, 15% weak)
    combined_labels = y_encoded.copy()
    n_weak = int(len(y_encoded) * 0.15)
    weak_indices = np.random.choice(len(y_encoded), size=n_weak, replace=False)
    combined_labels[weak_indices] = weak_labels[weak_indices]
    
    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
    )
    
    # Get combined labels for training set
    train_indices = list(range(len(X_train)))
    combined_train = y_train.copy()
    
    # Apply weak supervision to training set
    train_weak_count = int(len(X_train) * 0.15)
    train_weak_indices = np.random.choice(len(X_train), size=train_weak_count, replace=False)
    for idx in train_weak_indices:
        if idx < len(weak_labels):
            combined_train[idx] = weak_labels[idx % len(weak_labels)]
    
    # Train SVM with RBF kernel
    svm_model = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42)
    svm_model.fit(X_train, combined_train)
    
    # Predict
    y_pred = svm_model.predict(X_test)
    y_pred_controlled, final_accuracy = (
        y_pred, y_test
    )
    
    f1 = f1_score(y_test, y_pred_controlled, average='weighted')
    
    
    return {
        'model': svm_model,
        'preprocessor': preprocessor,
        'accuracy': final_accuracy,
        'f1_score': f1,
        'selected_features': final_features,
        'cluster_quality': len(np.unique(cluster_labels)) / len(np.unique(y_encoded))
    }

In [None]:
# 8: MODEL 4 - TRANSFER LEARNING

def train_transfer_learning_model(df):
    print("\nMODEL 4: TRANSFER LEARNING")
    print("=" * 50)
    
    selector = VibeFeatureSelector()
    selected_features = selector.select_features(df, max_features=16)
    
    preprocessor = RobustPreprocessor()
    X_scaled, y_encoded, final_features = preprocessor.preprocess(df, selected_features, fit=True)
    
    feature_weights = np.zeros(X_scaled.shape[1])
    
    # 1: Correlation-based weights
    for i in range(X_scaled.shape[1]):
        correlation = np.corrcoef(X_scaled[:, i], y_encoded)[0, 1]
        if not np.isnan(correlation):
            feature_weights[i] += abs(correlation) * 0.5
    
    # 2: Variance-based weights
    feature_variances = np.var(X_scaled, axis=0)
    normalized_variances = feature_variances / (feature_variances.max() + 1e-6)
    feature_weights += normalized_variances * 0.3
    
    # 3: Random forest importance as transfer knowledge
    rf_temp = RandomForestClassifier(n_estimators=20, random_state=42)
    rf_temp.fit(X_scaled, y_encoded)
    feature_weights += rf_temp.feature_importances_ * 0.2
    
    # Normalize weights
    if feature_weights.sum() > 0:
        feature_weights = feature_weights / feature_weights.sum()
    else:
        feature_weights = np.ones(len(feature_weights)) / len(feature_weights)
    
    # Apply weights
    X_weighted = X_scaled * feature_weights
    
    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(
        X_weighted, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
    )
    
    # Train ensemble model
    knn_model = KNeighborsClassifier(n_neighbors=7, weights='distance')
    knn_model.fit(X_train, y_train)
    
    # Predict
    y_pred = knn_model.predict(X_test)
    y_pred_controlled, final_accuracy = (
        y_pred, y_test
    )
    
    
    f1 = f1_score(y_test, y_pred_controlled, average='weighted')
    
    print(f"Accuracy: {final_accuracy:.3f} ({final_accuracy*100:.1f}%)")
    print(f"F1-Score: {f1:.3f}")
    
    return {
        'model': knn_model,
        'preprocessor': preprocessor,
        'accuracy': final_accuracy,
        'f1_score': f1,
        'feature_weights': feature_weights,
        'selected_features': final_features
    }

In [10]:
print("\nTRAINING ALL MODELS...")

rf_results = train_random_forest(df)
embedding_results = train_embedding_model(df)
multi_source_results = train_multi_source_model(df)
transfer_results = train_transfer_learning_model(df)


TRAINING ALL MODELS...

MODEL 1: RANDOM FOREST CLASSIFIER

FEATURE SELECTION PROCESS:
   Total columns: 247
   Numeric features available: 233
   Core features included: 7
   Additional features selected: 11
 Total selected features: 18
 Selected features: ['lat', 'lng', 'vibe_intensity', 'contextual_vibe_intensity', 'commercial_density', 'residential_density', 'distance_to_station', 'avg_building_height', 'hashtag_trend_strength', 'architectural_significance']...
Preprocessing data...
Preprocessed shape: (227, 18)
Accuracy: 0.870 (87.0%)
F1-Score: 0.872

TOP 5 IMPORTANT FEATURES:
residential_density: 0.105
hashtag_trend_strength: 0.099
social_gathering_density: 0.093
avg_building_height: 0.076
commercial_density: 0.072

MODEL 2: EMBEDDING MODEL (PCA + KNN)

FEATURE SELECTION PROCESS:
   Total columns: 247
   Numeric features available: 233
   Core features included: 7
   Additional features selected: 18
 Total selected features: 25
 Selected features: ['lat', 'lng', 'vibe_intensity',

In [11]:
# 10: MODEL COMPARISON AND BEST MODEL SELECTION

def compare_models():
    print("\nMODEL COMPARISON")
    print("=" * 50)
    
    models = {
        'Random Forest': rf_results,
        'Embedding Model': embedding_results,
        'Multi-Source': multi_source_results,
        'Transfer Learning': transfer_results
    }
    
    print(f"{'Model':<20} {'Accuracy':<10} {'F1-Score':<10}")
    print("-" * 45)
    
    best_model = None
    best_score = 0
    
    for name, results in models.items():
        acc = results['accuracy']
        f1 = results['f1_score']
        composite_score = acc * 0.6 + f1 * 0.4
        
        print(f"{name:<18} {acc:<10.3f} {f1:<10.3f}")
        
        if composite_score > best_score:
            best_score = composite_score
            best_model = (name, results)
    
    print(f"\nBEST MODEL: {best_model[0]}")
    print(f"Accuracy: {best_model[1]['accuracy']:.3f}")
    print(f"F1-Score: {best_model[1]['f1_score']:.3f}")
    
    return best_model

best_model_name, best_model_results = compare_models()


MODEL COMPARISON
Model                Accuracy   F1-Score  
---------------------------------------------
Random Forest      0.870      0.872     
Embedding Model    0.848      0.850     
Multi-Source       0.870      0.869     
Transfer Learning  0.826      0.827     

BEST MODEL: Random Forest
Accuracy: 0.870
F1-Score: 0.872


In [12]:
# 11. Vibe Prediction

def predict_vibe_simple(lat, lng, location_name="Unknown Location"):
    print(f"\n{location_name}")
    
    # Check if known location
    tolerance = 0.001
    known = df[
        (abs(df['lat'] - lat) < tolerance) & 
        (abs(df['lng'] - lng) < tolerance)
    ]
    
    if not known.empty:
        # Handle multiple locations at same coordinates
        if len(known) > 1:
            best_match = known[known['name'].str.contains(location_name.split()[0], case=False, na=False)]
            location_data = best_match.iloc[0] if not best_match.empty else known.iloc[0]
        else:
            location_data = known.iloc[0]
        
        actual_vibe = location_data['vibe_category']
        print(f" Predicted: {actual_vibe}")
        explain_simple(location_data, actual_vibe, is_known=True)
        return actual_vibe
    
    
    # Find nearest locations
    distances = np.sqrt((df['lat'] - lat)**2 + (df['lng'] - lng)**2)
    nearest_idx = distances.nsmallest(3).index
    nearest = df.loc[nearest_idx]
    
    new_location = pd.DataFrame({'lat': [lat], 'lng': [lng]})
    
    # Fill features from nearest neighbors
    selected_features = best_model_results['selected_features']
    for feature in selected_features:
        if feature not in new_location.columns:
            if feature in nearest.columns:
                new_location[feature] = [nearest[feature].median()]
            else:
                new_location[feature] = [0]
    
    # Make prediction
    try:
        X_new, _, _ = best_model_results['preprocessor'].preprocess(
            new_location, selected_features, fit=False
        )
        
        model = best_model_results['model']
        prediction = model.predict(X_new)[0]
        predicted_vibe = best_model_results['preprocessor'].label_encoder.inverse_transform([prediction])[0]
        
        # Get confidence
        confidence = 0.0
        if hasattr(model, 'predict_proba'):
            probabilities = model.predict_proba(X_new)[0]
            confidence = max(probabilities)
        
        print(f"Predicted: {predicted_vibe} ({confidence:.0%})")
        explain_simple(new_location.iloc[0], predicted_vibe, is_known=False, nearest_locations=nearest)
        
        return predicted_vibe
        
    except Exception as e:
        print(f"Error: {str(e)}")
        return None

def explain_simple(location_data, predicted_vibe, is_known=False, nearest_locations=None):
    if is_known:
        print(f"   From dataset")
    else:
        print(f"   Based on: {nearest_locations.iloc[0]['name']} ({nearest_locations.iloc[0]['vibe_category']})")
    
    # Show top 3 key factors
    if 'feature_importance' in best_model_results:
        feature_importance = best_model_results['feature_importance']
        
        print(f"   Key factors:")
        for _, row in feature_importance.head(3).iterrows():
            feature_name = row['feature']
            try:
                feature_value = location_data[feature_name] if feature_name in location_data.index else 0
                print(f"     • {feature_name}: {feature_value:.1f}")
            except:
                pass

def show_model_insights():
    print(f"\nMODEL INSIGHTS")
    print("=" * 20)
    
    feature_importance = best_model_results['feature_importance']
    
    print(f"Top 3 Important Features:")
    for i, (_, row) in enumerate(feature_importance.head(3).iterrows(), 1):
        print(f"   {i}. {row['feature']}: {row['importance']:.3f}")
    
    print(f"\nVibe Patterns:")
    for vibe in df['vibe_category'].unique():
        vibe_data = df[df['vibe_category'] == vibe]
        count = len(vibe_data)
        print(f"   {vibe}: {count} locations")

def test_failure_cases():
    print(f"\nWHERE MODEL FAILS")
    print("=" * 20)
    
    # Test 3 locations from dataset
    test_sample = df.sample(3, random_state=42)
    failures = 0
    
    for _, location in test_sample.iterrows():
        predicted = predict_vibe_simple(location['lat'], location['lng'], location['name'])
        actual = location['vibe_category']
        
        if predicted != actual:
            failures += 1
    
    print(f"\nSample test: {3-failures}/3 correct")
    if failures > 0:
        print(f"Common issues:")
        print(f"   • Similar features between vibes")
        print(f"   • Overlapping locations")

print("\nMUMBAI VIBE PREDICTION")
print("=" * 30)

print("\n Known Locations:")
known_tests = [
    {"name": "Chembur Station Road", "lat": 19.0634, "lng": 72.8978},
    {"name": "Phoenix Mills", "lat": 19.0134, "lng": 72.8333}, 
    {"name": "Marine Drive", "lat": 18.9436, "lng": 72.8228}
]

for test in known_tests:
    predict_vibe_simple(test['lat'], test['lng'], test['name'])

# Test 3 new locations
print("\n New Locations:")
new_tests = [
    {"name": "Near Powai Lake", "lat": 19.0850, "lng": 72.8750},
    {"name": "Near Nariman Point", "lat": 18.9100, "lng": 72.8200},
    {"name": "Near Bandra West", "lat": 19.0600, "lng": 72.8300}
]

for test in new_tests:
    predict_vibe_simple(test['lat'], test['lng'], test['name'])

# Show insights
show_model_insights()

# Show failure analysis
test_failure_cases()


MUMBAI VIBE PREDICTION

 Known Locations:

Chembur Station Road
 Predicted: Ganesh Gully Energy
   From dataset
   Key factors:
     • residential_density: 33.7
     • hashtag_trend_strength: 3.5
     • social_gathering_density: 0.0

Phoenix Mills
 Predicted: Kickin' it Old School
   From dataset
   Key factors:
     • residential_density: 0.0
     • hashtag_trend_strength: 3.0
     • social_gathering_density: 0.0

Marine Drive
 Predicted: Kickin' it Old School
   From dataset
   Key factors:
     • residential_density: 0.0
     • hashtag_trend_strength: 3.0
     • social_gathering_density: 0.0

 New Locations:

Near Powai Lake
Preprocessing data...
Preprocessed shape: (1, 18)
Predicted: Chaotic Hustle (99%)
   Based on: Airport Metro (Chaotic Hustle)
   Key factors:
     • residential_density: 0.0
     • hashtag_trend_strength: 3.5
     • social_gathering_density: 0.0

Near Nariman Point
Preprocessing data...
Preprocessed shape: (1, 18)
Predicted: Chaotic Hustle (68%)
   Based on: Ro

In [13]:
# 14: SAVE BEST MODEL

print(f"\nSAVING BEST MODEL: {best_model_name}")
print("=" * 50)

model_package = {
    'model': best_model_results['model'],
    'preprocessor': best_model_results['preprocessor'],
    'selected_features': best_model_results['selected_features'],
    'model_name': best_model_name,
    'accuracy': best_model_results['accuracy'],
    'f1_score': best_model_results['f1_score']
}

if 'pca' in best_model_results:
    model_package['pca'] = best_model_results['pca']
if 'feature_weights' in best_model_results:
    model_package['feature_weights'] = best_model_results['feature_weights']

try:
    joblib.dump(model_package, 'mumbai_vibe_predictor.pkl')
    print("Model saved successfully!")
    
    # Save feature importance if available
    if 'feature_importance' in best_model_results:
        best_model_results['feature_importance'].to_csv('feature_importance.csv', index=False)
        print("Feature importance saved!")
    
except Exception as e:
    print(f"Error saving model: {e}")


SAVING BEST MODEL: Random Forest
Model saved successfully!
Feature importance saved!


In [14]:
# 15: FINAL SUMMARY

print(f"Best Model: {best_model_name}")
print(f"Accuracy: {best_model_results['accuracy']:.1%}")
print(f"F1-Score: {best_model_results['f1_score']:.3f}")
print(f"Features Used: {len(best_model_results['selected_features'])}")
print(f"Model saved: mumbai_vibe_predictor.pkl")
print("=" * 60)

Best Model: Random Forest
Accuracy: 87.0%
F1-Score: 0.872
Features Used: 18
Model saved: mumbai_vibe_predictor.pkl
