# Machine Learning Real-World Applications Across Industries

**Author:** ML Applications Series  
**Topic:** Practical Use Cases in Business & Industry  
**Level:** Intermediate Learner

---

## Table of Contents
1. Retail & E-Commerce (Amazon, Big Bazaar)
2. Banking & Finance
3. Healthcare & Pharmaceuticals
4. Transportation & Logistics
5. Manufacturing
6. Consumer Internet & Social Media (Twitter/X)

---


## 1. RETAIL & E-COMMERCE

### Industry Overview
Global e-commerce market: $5.8 trillion (2023), growing 10% annually

### 1.1 Personalized Recommendations

**The Problem:**
- Amazon alone has 300+ million products
- Without recommendations, customers overwhelmed
- Random product suggestions = lost sales

**The Solution: Recommendation Engines**


In [None]:
# How Amazon's recommendation system works
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

class AmazonRecommender:
    def __init__(self):
        self.user_item_matrix = None  # m x n matrix (users x items)
        self.user_profiles = {}
        
    def collect_signals(self, user_id):
        """Gather all user behavior signals"""
        signals = {
            'purchase_history': get_purchases(user_id),      # What they bought
            'browse_history': get_browsing(user_id),          # What they looked at
            'wishlist': get_wishlist(user_id),                # What they saved
            'ratings': get_ratings(user_id),                  # What they rated
            'time_spent': get_view_time(user_id),             # Time viewing products
            'device_info': get_device(user_id),               # Desktop vs mobile
            'location': get_location(user_id),                # Geographic data
            'demographics': get_demographics(user_id),        # Age, gender
        }
        return signals
    
    def compute_recommendations(self, user_id, top_k=10):
        """Generate personalized product recommendations"""
        
        # Step 1: Get user profile (what they like)
        user_signals = self.collect_signals(user_id)
        
        # Step 2: Collaborative Filtering
        # Find similar users (people with similar taste)
        similar_users = self.find_similar_users(user_id)
        
        # Step 3: Get products from similar users
        candidate_products = set()
        for similar_user in similar_users:
            candidate_products.update(
                get_purchases(similar_user)
            )
        
        # Remove products user already bought
        candidate_products -= set(user_signals['purchase_history'])
        
        # Step 4: Content-Based Filtering
        # Find products similar to what user liked
        liked_products = user_signals['purchase_history']
        content_similar = []
        
        for candidate in candidate_products:
            similarity = compute_product_similarity(
                liked_products,
                candidate
            )
            content_similar.append((candidate, similarity))
        
        # Step 5: Rank and filter
        ranked = sorted(content_similar, key=lambda x: x[1], reverse=True)
        recommendations = [p[0] for p in ranked[:top_k]]
        
        return recommendations
    
    def find_similar_users(self, user_id):
        """Find users with similar purchase patterns"""
        from sklearn.metrics.pairwise import cosine_similarity
        
        user_vector = self.user_item_matrix[user_id]
        similarities = cosine_similarity([user_vector], self.user_item_matrix)[0]
        
        # Get top similar users
        similar_indices = np.argsort(similarities)[-10:]
        return similar_indices

# Real-world metrics:
# - Amazon: 35% of revenue from recommendations
# - Netflix: 80% of watch time from recommendations  
# - Target: 15% of sales from personalization


**Impact:**
- Revenue increase: +30-40%
- Average order value: +20-25%
- Customer retention: +15-20%

### 1.2 Dynamic Pricing

**The Problem:**
Traditional pricing: Fixed prices that don't respond to market
- Miss revenue opportunities
- Lose sales during high-demand periods
- Overprice during low-demand periods

**The Solution: ML-Powered Dynamic Pricing**


In [None]:
class DynamicPricingEngine:
    def __init__(self):
        self.price_model = None
    
    def calculate_optimal_price(self, product_id, context):
        """Determine ideal price in real-time"""
        
        # Collect contextual data
        demand_signals = {
            'current_inventory': context['inventory'],       # Low stock → raise price
            'competitor_prices': context['competitor_price'], # Undercut if needed
            'seasonal_demand': context['season'],             # Holiday → raise price
            'customer_segment': context['customer_type'],     # Premium → higher price
            'time_of_day': context['time'],                   # Peak hours → higher
            'day_of_week': context['weekday'],                # Weekend → higher
            'weather': context['weather'],                    # Cold → raise AC prices
            'search_trends': context['search_volume'],        # High searches → raise
            'browsing_speed': context['add_to_cart_rate'],    # Fast → higher elasticity
        }
        
        # Historical data: What prices worked before?
        historical_data = get_historical_pricing_data(product_id)
        
        # ML model: Predict optimal price
        features = vectorize_context(demand_signals)
        predicted_elasticity = self.elasticity_model.predict([features])[0]
        
        # Price optimization: Maximize revenue = Price × Quantity
        # revenue(p) = p × (demand_function(p))
        optimal_price = optimize_for_max_revenue(
            predicted_elasticity,
            current_price=context['current_price'],
            min_price=context['min_price'],
            max_price=context['max_price']
        )
        
        return optimal_price

# Real-world examples:
# Amazon: Prices change 10M+ times daily for different products
# Uber: Surge pricing during peak demand
# Airbnb: Dynamic pricing based on demand, seasonality, events
# Target: Personalized prices for different customer segments

# Impact:
# - Revenue increase: 5-15%
# - Margin improvement: 2-5%
# - Inventory turnover: +20-30% (faster moving stock)


### 1.3 Demand Forecasting & Inventory Optimization

**The Problem:**
- Too much inventory: Waste money on storage, markdowns
- Too little inventory: Lost sales, customer frustration
- Seasonal variations: Hard to predict

**The Solution: Predictive Demand Forecasting**


In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np

class InventoryOptimizer:
    def __init__(self):
        self.demand_model = RandomForestRegressor(n_estimators=100)
        
    def forecast_demand(self, product_id, next_n_days=30):
        """Predict demand for next 30 days"""
        
        # Historical data
        historical = pd.read_csv(f'sales_{product_id}.csv')
        
        # Features for prediction
        X = self.create_features(historical)
        y = historical['units_sold']
        
        # Train model
        self.demand_model.fit(X, y)
        
        # Future features
        future_dates = pd.date_range(
            start=historical['date'].max(),
            periods=next_n_days,
            freq='D'
        )
        
        X_future = self.create_features_for_dates(future_dates)
        forecast = self.demand_model.predict(X_future)
        
        return forecast
    
    def create_features(self, data):
        """Engineer features for demand prediction"""
        features = pd.DataFrame()
        
        # Temporal features
        features['day_of_week'] = data['date'].dt.dayofweek
        features['month'] = data['date'].dt.month
        features['day_of_month'] = data['date'].dt.day
        features['quarter'] = data['date'].dt.quarter
        features['is_holiday'] = is_holiday(data['date'])
        
        # Lag features (past sales)
        features['sales_lag_1'] = data['units_sold'].shift(1)
        features['sales_lag_7'] = data['units_sold'].shift(7)
        features['sales_lag_30'] = data['units_sold'].shift(30)
        
        # Rolling averages
        features['sales_ma_7'] = data['units_sold'].rolling(7).mean()
        features['sales_ma_30'] = data['units_sold'].rolling(30).mean()
        
        # External signals
        features['competitor_price'] = data['competitor_price']
        features['our_price'] = data['our_price']
        features['promotion_active'] = data['has_promotion']
        features['website_traffic'] = data['traffic']
        
        return features.fillna(0)
    
    def optimize_inventory(self, product_id, safety_stock=20):
        """Determine optimal inventory level"""
        
        # Forecast demand
        forecast = self.forecast_demand(product_id, next_n_days=30)
        
        # Calculate statistics
        expected_demand = np.mean(forecast)
        demand_std = np.std(forecast)
        demand_variance = demand_std ** 2
        
        # Lead time (how long to get new stock)
        lead_time = get_lead_time(product_id)  # e.g., 7 days
        
        # Optimal stock calculation
        # EOQ (Economic Order Quantity) = sqrt((2 * D * S) / H)
        # D = annual demand
        # S = order cost
        # H = holding cost
        
        annual_demand = np.sum(forecast) * 12  # Extrapolate to year
        order_cost = get_order_cost(product_id)
        holding_cost = get_holding_cost(product_id)
        
        eoq = np.sqrt((2 * annual_demand * order_cost) / holding_cost)
        
        # Safety stock to avoid stockouts
        # Assuming normal distribution of demand
        z_score = 1.96  # 95% confidence
        safety_stock_calc = z_score * demand_std * np.sqrt(lead_time)
        
        # Reorder point = (Average demand per day) × Lead time + Safety stock
        avg_daily_demand = expected_demand
        reorder_point = (avg_daily_demand * lead_time) + safety_stock_calc
        
        return {
            'optimal_order_quantity': eoq,
            'reorder_point': reorder_point,
            'safety_stock': safety_stock_calc,
            'forecasted_demand': expected_demand,
            'confidence_interval': (
                expected_demand - 1.96 * demand_std,
                expected_demand + 1.96 * demand_std
            )
        }

# Real-world impact (Retail):
# - Inventory holding costs: -15-30%
# - Stockout rate: Reduced 20-40%
# - Markdown/waste: Reduced 10-20%
# - Cash flow: Improved 25-35%

# Walmart's impact:
# - Serves 200M+ customers weekly
# - Uses ML to forecast demand down to individual stores
# - Reduced waste by $1B+ annually
# - Improved freshness of products


### 1.4 Customer Churn Prediction

**The Problem:**
- Customers leave without warning
- Cost to acquire new customer: 5-25x cost to retain
- Need to identify at-risk customers BEFORE they leave

**The Solution: Churn Prediction Model**


In [None]:
from sklearn.ensemble import GradientBoostingClassifier

class ChurnPredictor:
    def __init__(self):
        self.model = GradientBoostingClassifier()
    
    def predict_churn_risk(self, customer_id):
        """Predict probability customer will churn"""
        
        # Collect customer features
        features = self.extract_features(customer_id)
        
        # Predict churn probability
        churn_prob = self.model.predict_proba([features])[0][1]
        
        if churn_prob > 0.5:  # High risk
            # Recommend retention action
            action = self.recommend_retention(customer_id, churn_prob)
            return {
                'risk': churn_prob,
                'action': action,
                'incentive': calculate_incentive(churn_prob)
            }
        
        return {'risk': churn_prob, 'action': None}
    
    def extract_features(self, customer_id):
        """Engineer features to predict churn"""
        
        # Behavioral features
        days_as_customer = get_days_since_signup(customer_id)
        purchase_frequency = get_purchases_per_month(customer_id)
        days_since_last_purchase = get_days_since_purchase(customer_id)
        lifetime_value = get_total_spent(customer_id)
        avg_order_value = get_avg_order_value(customer_id)
        
        # Engagement features
        login_frequency = get_logins_per_month(customer_id)
        app_usage_minutes = get_app_time(customer_id)
        reviews_written = get_reviews_count(customer_id)
        wishlist_items = get_wishlist_size(customer_id)
        
        # Support interaction features
        support_tickets = get_support_tickets(customer_id)
        complaint_count = get_complaints(customer_id)
        satisfaction_score = get_satisfaction_rating(customer_id)
        
        # Competitive exposure
        browsed_competitor_sites = did_visit_competitor(customer_id)
        price_sensitivity = analyze_price_changes_response(customer_id)
        
        # Recent patterns
        purchase_trend = linear_regression_slope(  # Is spending going up/down?
            get_monthly_spend_trend(customer_id)
        )
        engagement_trend = linear_regression_slope(
            get_monthly_engagement_trend(customer_id)
        )
        
        return np.array([
            days_as_customer,
            purchase_frequency,
            days_since_last_purchase,
            lifetime_value,
            avg_order_value,
            login_frequency,
            app_usage_minutes,
            reviews_written,
            wishlist_items,
            support_tickets,
            complaint_count,
            satisfaction_score,
            browsed_competitor_sites,
            price_sensitivity,
            purchase_trend,
            engagement_trend
        ])
    
    def recommend_retention(self, customer_id, churn_prob):
        """Recommend action to retain customer"""
        
        if churn_prob > 0.8:  # Very high risk
            return {
                'action': 'Personal outreach',
                'channel': 'Call/Email from manager',
                'incentive': '20% discount + free shipping',
                'timing': 'Immediate'
            }
        elif churn_prob > 0.6:  # High risk
            return {
                'action': 'Special offer',
                'channel': 'Email/SMS',
                'incentive': '15% discount',
                'timing': 'Within 24 hours'
            }
        elif churn_prob > 0.5:  # Medium risk
            return {
                'action': 'Personalized recommendation',
                'channel': 'Email',
                'incentive': '10% discount',
                'timing': 'Within 48 hours'
            }

# Real-world impact (E-commerce):
# - Retention improvement: +5-15%
# - Revenue impact: +$100K-$1M annually
# - Cost of retention: $1-10 per customer
# - ROI: 10-100x

# Amazon's example:
# - Tracks 50+ engagement metrics per customer
# - Identifies churning customers 1-2 months early
# - Saves $100M+ annually through targeted retention


### 1.5 Fraud Detection in E-Commerce


In [None]:
class EcommerceFraudDetector:
    def __init__(self):
        self.model = RandomForestClassifier()
        
    def detect_fraudulent_order(self, order):
        """Identify suspicious orders in real-time"""
        
        features = self.extract_features(order)
        fraud_prob = self.model.predict_proba([features])[0][1]
        
        if fraud_prob > 0.7:
            # Block or challenge transaction
            return {
                'status': 'FRAUD_DETECTED',
                'action': 'BLOCK' if fraud_prob > 0.9 else 'CHALLENGE',
                'confidence': fraud_prob
            }
        
        return {'status': 'APPROVED', 'confidence': fraud_prob}
    
    def extract_features(self, order):
        """Detect fraud signals"""
        
        # Address verification
        billing_shipping_match = order['billing_zip'] == order['shipping_zip']
        shipping_country_match = order['billing_country'] == order['shipping_country']
        unusual_destination = is_high_risk_country(order['shipping_country'])
        
        # Purchase pattern anomalies
        is_bulk_purchase = order['quantity'] > customer_avg_quantity(order['customer_id']) * 3
        order_amount_unusual = (
            order['total'] > customer_avg_order(order['customer_id']) * 5
        )
        multiple_declined = count_declined_cards_recent(order['customer_id']) > 3
        
        # Device & network signals
        new_device = is_new_device(order['customer_id'], order['device_id'])
        new_ip = is_new_ip(order['customer_id'], order['ip_address'])
        vpn_proxy = is_vpn_or_proxy(order['ip_address'])
        
        # Velocity checks (too many orders too fast)
        orders_last_hour = count_orders_by_customer_past_hour(order['customer_id'])
        orders_last_day = count_orders_by_customer_past_day(order['customer_id'])
        
        # Card characteristics
        card_age_days = (datetime.now() - order['card_first_use']).days
        card_matches_name = fuzzy_match(order['card_name'], order['customer_name']) > 0.8
        card_country = get_card_issuing_country(order['card_number'])
        country_mismatch = card_country != order['billing_country']
        
        # Product risk
        high_value_items = any(item['price'] > 1000 for item in order['items'])
        electronics_only = all(item['category'] == 'electronics' for item in order['items'])
        gift_cards = any(item['product_id'].startswith('gc_') for item in order['items'])
        
        return np.array([
            billing_shipping_match,
            shipping_country_match,
            unusual_destination,
            is_bulk_purchase,
            order_amount_unusual,
            multiple_declined,
            new_device,
            new_ip,
            vpn_proxy,
            orders_last_hour,
            orders_last_day,
            card_age_days,
            card_matches_name,
            country_mismatch,
            high_value_items,
            electronics_only,
            gift_cards
        ])

# Real-world impact:
# - Fraud loss reduction: 50-70%
# - False positive rate: <1%
# - Chargeback reduction: 30-50%


---
