In [1]:
import pandas as pd
import matplotlib.pyplot as plt

# Load your CSV file
 

# Read data
df = pd.read_csv("WA_Fn-UseC_-Telco-Customer-Churn.csv")

# Get first 5 rows
first_5 = df.head()

# Create figure
fig, ax = plt.subplots(figsize=(20, 4))
ax.axis('off')

# Create table
table = ax.table(
    cellText=first_5.values,
    colLabels=first_5.columns,
    cellLoc='center',
    loc='center',
    bbox=[0, 0, 1, 1]
)

table.auto_set_font_size(False)
table.set_fontsize(10)
table.scale(1, 2.5)

# Style header
for i in range(len(first_5.columns)):
    table[(0, i)].set_facecolor('#4CAF50')
    table[(0, i)].set_text_props(weight='bold', color='white')

# Style rows
for i in range(1, 6):
    for j in range(len(first_5.columns)):
        table[(i, j)].set_facecolor('#f9f9f9' if i % 2 == 0 else 'white')

# Save as PNG
plt.savefig('first_5_rows.png', dpi=300, bbox_inches='tight', facecolor='white')
print("✓ PNG saved as 'first_5_rows.png'")
plt.close()

✓ PNG saved as 'first_5_rows.png'


In [3]:
df.shape

(7043, 21)

In [5]:
import matplotlib.patches as mpatches
# Get column names
columns = df.columns.tolist()
num_columns = len(columns)

# Create figure
fig, ax = plt.subplots(figsize=(12, max(8, num_columns * 0.4)))
ax.axis('off')

# Title
plt.text(0.5, 0.95, f'Dataset Columns ({num_columns} total)', 
         ha='center', va='top', fontsize=18, fontweight='bold',
         transform=ax.transAxes)

# Create a nice list layout
y_start = 0.88
y_step = 0.85 / num_columns

for idx, col in enumerate(columns):
    y_pos = y_start - (idx * y_step)
    
    # Box background
    rect = mpatches.FancyBboxPatch(
        (0.05, y_pos - 0.015), 0.9, 0.03,
        boxstyle="round,pad=0.01",
        facecolor='#4CAF50' if idx % 2 == 0 else '#66BB6A',
        edgecolor='#2E7D32',
        transform=ax.transAxes,
        linewidth=1.5
    )
    ax.add_patch(rect)
    
    # Column number and name
    plt.text(0.08, y_pos, f"{idx + 1}.", 
             ha='left', va='center', fontsize=11, fontweight='bold',
             color='white', transform=ax.transAxes)
    
    plt.text(0.13, y_pos, col, 
             ha='left', va='center', fontsize=11,
             color='white', transform=ax.transAxes)

# Footer
plt.text(0.5, 0.02, f'Total Features: {num_columns}', 
         ha='center', va='bottom', fontsize=10, style='italic',
         transform=ax.transAxes, color='#555')

plt.tight_layout()
plt.savefig('column_names.png', dpi=300, bbox_inches='tight', facecolor='white')
print(f"✓ PNG saved as 'column_names.png'")
print(f"✓ Found {num_columns} columns")
plt.close()

✓ PNG saved as 'column_names.png'
✓ Found 21 columns


In [9]:
 
# Calculate missing values
missing_data = pd.DataFrame({
    'Column': df.columns,
    'Missing_Count': df.isnull().sum(),
    'Missing_Percentage': (df.isnull().sum() / len(df)) * 100
})

# Sort by missing percentage (descending)
missing_data = missing_data.sort_values('Missing_Percentage', ascending=False)

# Filter only columns with missing values
missing_data_filtered = missing_data[missing_data['Missing_Count'] > 0]

# Print summary
print("\n" + "="*60)
print("MISSING VALUES SUMMARY")
print("="*60)
print(f"\nTotal Rows: {len(df)}")
print(f"Total Columns: {len(df.columns)}")
print(f"Columns with Missing Values: {len(missing_data_filtered)}")
print("\nTop Missing Values:")
print(missing_data_filtered.head(10).to_string(index=False))

# Create visualization
if len(missing_data_filtered) > 0:
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, max(6, len(missing_data_filtered) * 0.4)))
    
    # Left plot: Bar chart
    colors = ['#e74c3c' if x > 50 else '#f39c12' if x > 20 else '#3498db' 
              for x in missing_data_filtered['Missing_Percentage']]
    
    bars = ax1.barh(missing_data_filtered['Column'], 
                     missing_data_filtered['Missing_Percentage'],
                     color=colors, edgecolor='black', linewidth=1.2)
    
    ax1.set_xlabel('Missing Percentage (%)', fontsize=12, fontweight='bold')
    ax1.set_ylabel('Columns', fontsize=12, fontweight='bold')
    ax1.set_title('Missing Values by Column', fontsize=14, fontweight='bold', pad=20)
    ax1.grid(axis='x', alpha=0.3, linestyle='--')
    ax1.set_xlim(0, 100)
    
    # Add percentage labels on bars
    for i, (bar, pct) in enumerate(zip(bars, missing_data_filtered['Missing_Percentage'])):
        width = bar.get_width()
        ax1.text(width + 1, bar.get_y() + bar.get_height()/2, 
                f'{pct:.2f}%', 
                ha='left', va='center', fontsize=9, fontweight='bold')
    
    # Right plot: Table
    ax2.axis('off')
    
    table_data = []
    for idx, row in missing_data_filtered.head(15).iterrows():
        table_data.append([
            row['Column'],
            f"{int(row['Missing_Count'])}",
            f"{row['Missing_Percentage']:.2f}%"
        ])
    
    table = ax2.table(
        cellText=table_data,
        colLabels=['Column Name', 'Missing Count', 'Missing %'],
        cellLoc='left',
        loc='center',
        bbox=[0, 0, 1, 1]
    )
    
    table.auto_set_font_size(False)
    table.set_fontsize(10)
    table.scale(1, 2)
    
    # Style header
    for i in range(3):
        table[(0, i)].set_facecolor('#e74c3c')
        table[(0, i)].set_text_props(weight='bold', color='white')
    
    # Style rows with color coding
    for i in range(1, len(table_data) + 1):
        pct = float(table_data[i-1][2].strip('%'))
        if pct > 50:
            color = '#ffcccc'
        elif pct > 20:
            color = '#ffe6cc'
        else:
            color = '#cce6ff'
        
        for j in range(3):
            table[(i, j)].set_facecolor(color)
    
    ax2.set_title('Top Missing Values', fontsize=14, fontweight='bold', pad=20)
    
    # Main title
    fig.suptitle(f'Missing Values Analysis - {len(missing_data_filtered)} Columns Affected', 
                 fontsize=16, fontweight='bold', y=0.98)
    
    plt.tight_layout()
    plt.savefig('missing_values_analysis.png', dpi=300, bbox_inches='tight', facecolor='white')
    print("\n✓ PNG saved as 'missing_values_analysis.png'")
    
else:
    # No missing values - create a success message
    fig, ax = plt.subplots(figsize=(10, 6))
    ax.axis('off')
    
    plt.text(0.5, 0.5, '✓ NO MISSING VALUES FOUND!', 
             ha='center', va='center', fontsize=24, fontweight='bold',
             color='#27ae60', transform=ax.transAxes)
    
    plt.text(0.5, 0.3, f'All {len(df.columns)} columns are complete', 
             ha='center', va='center', fontsize=14,
             color='#555', transform=ax.transAxes)
    
    plt.savefig('missing_values_analysis.png', dpi=300, bbox_inches='tight', facecolor='white')
    print("\n✓ PNG saved as 'missing_values_analysis.png'")
    print("✓ No missing values detected!")

plt.close()


MISSING VALUES SUMMARY

Total Rows: 7043
Total Columns: 21
Columns with Missing Values: 0

Top Missing Values:
Empty DataFrame
Columns: [Column, Missing_Count, Missing_Percentage]
Index: []

✓ PNG saved as 'missing_values_analysis.png'
✓ No missing values detected!
