In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px

# Load the CSV data
df = pd.read_csv('data/host-analysis.csv')

# 1. ISP Distribution Pie Chart
plt.figure(figsize=(10, 8))
isp_counts = df['isp'].value_counts().dropna()

# Combine smaller ISPs into 'Other' category if more than 5 unique ISPs
threshold = 14
if len(isp_counts) > threshold:
    other_count = isp_counts[threshold:].sum()
    isp_counts = isp_counts[:threshold]
    isp_counts['Other'] = other_count

plt.pie(isp_counts.values, labels=isp_counts.index, 
        autopct=lambda p: f'{p:.1f}% ({int(p * sum(isp_counts.values) / 100)})', startangle=140)
plt.title('ISP Distribution')
plt.axis('equal')  # Ensure pie is circular
plt.tight_layout()
plt.show()

# 2. Country Distribution Heatmap
country_counts = df['country'].value_counts().dropna().reset_index()
country_counts.columns = ['country', 'count']

fig = px.choropleth(country_counts,
                    locations='country',
                    locationmode='country names',
                    color='count',
                    hover_name='country',
                    color_continuous_scale='Viridis',
                    title='Host Country Distribution')
fig.update_layout(geo=dict(showframe=False, 
                          showcoastlines=False,
                          projection_type='equirectangular'))
fig.show()