In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd

# Enable inline plots (for Jupyter Notebooks)
%matplotlib inline

# Load dataset
df = pd.read_csv('/content/contentdrive/MyDrive/DATA SCIENCE Project/complaints.csv', low_memory=False)

# Data overview
df.info()

# Convert date and extract month
df["Date sent to company"] = pd.to_datetime(df["Date sent to company"])
df['Month sent to company'] = df["Date sent to company"].dt.strftime("%Y-%m")

# View first few rows
df.head(5)

# Timely response count
df['Timely response?'].value_counts().plot.barh(title='Timely Response: Yes vs No')
plt.show()

# Filter complaints with no timely response
df_badres = df[df['Timely response?'] == 'No']

# Top 5 products (All)
plt.figure(figsize=(4, 3))
color = ['b', 'b', 'b', 'b', 'r']
df['Product'].value_counts().sort_values().tail(5).plot.barh(color=color, title='Top 5 Products (All)')
plt.show()

# Top 5 products (Not timely responded)
plt.figure(figsize=(4, 3))
df_badres['Product'].value_counts().sort_values().tail(5).plot.barh(color=color, title='Top 5 Products (Delayed)')
plt.show()

# Top 10 sub-products (All)
plt.figure(figsize=(5, 3))
color = ['b'] * 9 + ['r']
df['Sub-product'].value_counts().sort_values().tail(10).plot.barh(color=color, title='Top 10 Sub-products (All)')
plt.show()

# Top 10 sub-products (Not timely responded)
plt.figure(figsize=(5, 3))
color = ['b'] * 8 + ['r', 'b']
df_badres['Sub-product'].value_counts().sort_values().tail(10).plot.barh(color=color, title='Top 10 Sub-products (Delayed)')
plt.show()

# Pie chart - Sub-products (All)
point = [0] * 16 + [0.2] + [0] * 3
df['Sub-product'].value_counts().sort_values().tail(20).plot.pie(
    autopct="%1.1f%%", radius=0.8, explode=point, title='Sub-product Distribution (All)')
plt.ylabel("")
plt.show()

# Pie chart - Sub-products (Not timely responded)
point = [0] * 18 + [0.2] + [0]
df_badres['Sub-product'].value_counts().sort_values().tail(20).plot.pie(
    autopct="%1.1f%%", radius=0.8, explode=point, title='Sub-product Distribution (Delayed)')
plt.ylabel("")
plt.show()

# Top 10 issues (All)
plt.figure(figsize=(4, 3))
color = ['b'] * 9 + ['r']
df['Issue'].value_counts().sort_values().tail(10).plot.barh(color=color, title='Top 10 Issues (All)')
plt.show()

# Top 10 issues (Not timely responded)
plt.figure(figsize=(4, 3))
color = ['b'] * 6 + ['r'] + ['b'] * 2 + ['r']
df_badres['Issue'].value_counts().sort_values().tail(10).plot.barh(color=color, title='Top 10 Issues (Delayed)')
plt.show()

# Top 10 sub-issues (All)
plt.figure(figsize=(4, 3))
color = ['b'] * 9 + ['r']
df['Sub-issue'].value_counts().sort_values().tail(10).plot.barh(color=color, title='Top 10 Sub-issues (All)')
plt.show()

# Top 10 sub-issues (Not timely responded)
plt.figure(figsize=(4, 3))
color = ['b'] * 7 + ['r', 'r', 'r']
df_badres['Sub-issue'].value_counts().sort_values().tail(10).plot.barh(color=color, title='Top 10 Sub-issues (Delayed)')
plt.show()

# Submission channel for delayed responses
plt.figure(figsize=(4, 3))
color = ['b', 'b', 'b', 'b', 'b', 'b', 'r']
df_badres['Submitted via'].value_counts().sort_values().plot.barh(color=color, title='Submission Channels (Delayed)')
plt.show()

# Time series - All data
plt.figure(figsize=(10, 5))
df.groupby('Month sent to company').size().plot(title='Complaints per Month (All)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# Time series - Delayed responses
plt.figure(figsize=(10, 5))
df_badres.groupby('Month sent to company').size().plot(title='Complaints per Month (Delayed)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# Company response (All)
plt.figure(figsize=(4, 3))
df['Company response to consumer'].value_counts().sort_values().plot.barh(title='Company Responses (All)')
plt.show()

# Company response (Not timely responded)
plt.figure(figsize=(4, 3))
color = ['b', 'b', 'b', 'b', 'b', 'r', 'b']
df_badres['Company response to consumer'].value_counts().sort_values().plot.barh(color=color, title='Company Responses (Delayed)')
plt.show()
