# Flipkart Customer Service Satisfaction Analysis

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from faker import Faker
import shap
import os

# Set random seed for reproducibility
np.random.seed(42)

# For GitHub environments: use non-GUI backend
if 'CODESPACES' in os.environ or 'GITHUB_ACTIONS' in os.environ:
    plt.switch_backend('Agg')

## Data Loading & Anonymization

In [None]:
def load_data():
    # In real usage, replace with:
    # return pd.read_csv('customer_data.csv')
    fake = Faker()
    data = {
        'customer_id': [fake.uuid4() for _ in range(1000)],
        'agent_id': [fake.random_int(100, 999) for _ in range(1000)],
        'channel': np.random.choice(['Chat', 'Email', 'Phone', 'Social Media'], 1000),
        'resolution_time': np.random.exponential(10, 1000),
        'first_response': np.random.exponential(2, 1000),
        'interaction_count': np.random.poisson(3, 1000),
        'issue_type': np.random.choice(['Billing', 'Shipping', 'Product', 'Account'], 1000),
        'sentiment_score': np.random.uniform(-1, 1, 1000),
        'satisfaction': np.random.choice([0, 1], 1000, p=[0.35, 0.65])
    }
    return pd.DataFrame(data)

df = load_data()
df.head()

## Exploratory Data Analysis (EDA)

In [None]:
def save_plot(fig, filename):
    """Save plot for GitHub environments"""
    if 'CODESPACES' in os.environ or 'GITHUB_ACTIONS' in os.environ:
        fig.savefig(f'plots/{filename}.png')
        plt.close(fig)
    else:
        plt.show()

def perform_eda(df):
    # Create directory for plots
    os.makedirs('plots', exist_ok=True)
    
    # Target distribution
    plt.figure(figsize=(8, 5))
    sns.countplot(x='satisfaction', data=df)
    plt.title('Customer Satisfaction Distribution')
    save_plot(plt.gcf(), 'satisfaction_distribution')
    
    # Agent performance
    plt.figure(figsize=(10, 6))
    agent_perf = df.groupby('agent_id')['satisfaction'].mean().nlargest(10)
    agent_perf.plot(kind='bar')
    plt.title('Top 10 Agents by Satisfaction Score')
    plt.ylabel('Satisfaction Rate')
    save_plot(plt.gcf(), 'agent_performance')
    
    # Channel analysis
    plt.figure(figsize=(8, 5))
    channel_satisfaction = df.groupby('channel')['satisfaction'].mean()
    channel_satisfaction.plot(kind='bar')
    plt.title('Satisfaction Rate by Support Channel')
    plt.ylabel('Satisfaction Rate')
    save_plot(plt.gcf(), 'channel_analysis')
    
    # Issue type analysis
    plt.figure(figsize=(8, 5))
    issue_satisfaction = df.groupby('issue_type')['satisfaction'].mean()
    issue_satisfaction.plot(kind='bar')
    plt.title('Satisfaction Rate by Issue Type')
    plt.ylabel('Satisfaction Rate')
    save_plot(plt.gcf(), 'issue_analysis')

perform_eda(df)

## Feature Engineering

In [None]:
def feature_engineering(df):
    # Response efficiency
    df['response_efficiency'] = df['first_response'] / df['resolution_time']
    
    # Channel recurrence
    channel_counts = df.groupby('customer_id')['channel'].nunique()
    df = df.merge(channel_counts.rename('channel_recurrence'), on='customer_id')
    
    # Agent experience
    agent_interactions = df['agent_id'].value_counts()
    df['agent_experience'] = df['agent_id'].map(agent_interactions)
    
    return df

df = feature_engineering(df)
df.head()

## Model Training & Evaluation

In [None]:
def preprocess_data(df):
    # Encode categorical features
    le = LabelEncoder()
    df['channel'] = le.fit_transform(df['channel'])
    df['issue_type'] = le.fit_transform(df['issue_type'])
    
    # Feature scaling
    scaler = StandardScaler()
    numeric_cols = ['resolution_time', 'first_response', 'interaction_count', 
                   'sentiment_score', 'response_efficiency', 'channel_recurrence', 
                   'agent_experience']
    df[numeric_cols] = scaler.fit_transform(df[numeric_cols])
    
    # Train-test split
    X = df.drop(['customer_id', 'agent_id', 'satisfaction'], axis=1)
    y = df['satisfaction']
    
    return train_test_split(X, y, test_size=0.2, random_state=42)

X_train, X_test, y_train, y_test = preprocess_data(df)

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
save_plot(plt.gcf(), 'confusion_matrix')

## Business Insights

In [None]:
def generate_insights(df):
    insights = []
    
    # Channel efficiency
    channel_eff = df.groupby('channel')['satisfaction'].mean()
    best_channel = channel_eff.idxmax()
    insights.append(f"Highest satisfaction channel: {best_channel} ({channel_eff.max():.2%})")
    
    # Sentiment impact
    sentiment_corr = df['sentiment_score'].corr(df['satisfaction'])
    insights.append(f"Correlation between sentiment score and satisfaction: {sentiment_corr:.2f}")
    
    # Agent experience
    exp_corr = df['agent_experience'].corr(df['satisfaction'])
    insights.append(f"Agent experience vs satisfaction correlation: {exp_corr:.2f}")
    
    # Priority issues
    issue_priority = df.groupby('issue_type')['satisfaction'].mean().nsmallest(2)
    insights.append(f"Critical issues needing attention: {issue_priority.index.tolist()}")
    
    print("\nKey Business Insights:")
    for i, insight in enumerate(insights, 1):
        print(f"{i}. {insight}")

generate_insights(df)