### Importing necessary libraries

In [None]:
import pandas as pd
import plotly.express as px
from matplotlib import pyplot as plt

import matplotlib.lines as mlines
import numpy as np

### Loading data

In [None]:
top500 = pd.read_csv("/kaggle/input/top-500-supercomputers/TOP500.csv")

### Helper functions

In [None]:
def draw_countplot(data, names, values, title, orientation='x'):
    """
        Display a countplot
    """
    if orientation == 'y':
        fig = px.bar(data, x=values, y=names, text=values, title=title, height=1300, width=1000)
        fig.update_traces(textposition='outside')
        fig.show()
    else:
        fig = px.bar(data, x=names, y=values, text=values, title=title)
        fig.update_traces(textposition='outside')
        fig.show()

In [None]:
def draw_pie(data, names, values, title):
    """
        Display a Pie Chart
    """
    fig = px.pie(data, values=values, names=names, title=title)
    fig.update_traces(textposition='outside', textinfo='percent+label')
    fig.show()

In [None]:
def slope_chart_new_line(p1, p2, color='black'):
    ax = plt.gca()
    if p1[1]-p2[1] > 0:
        color='green'
    elif p1[1]==p2[1]:
        color='blue'
    else:
        color='red'
    l = mlines.Line2D([p1[0],p2[0]], [p1[1],p2[1]], color=color, marker='o', markersize=6)
    ax.add_line(l)
    return l

# Supercomputers in the world using Sunburst Chart

**Sunburst Chart**

The sunburst chart is ideal for displaying hierarchical data. Each level of the hierarchy is represented by one ring or circle with the innermost circle as the top of the hierarchy.

In [None]:
asia_supercomputers = top500.groupby(["Country", "Continent"]).size().reset_index(name='Number of Supercomputers')
asia_supercomputers['World'] = 'World'

In [None]:
fig = px.sunburst(asia_supercomputers,
                  path=["World", "Continent", "Country"],
                  values='Number of Supercomputers',
                  width=700, height=700,
                  title="Supercomputers in the world (Click on continent to see its countries)",
                  )
fig.show()

# Percentage of Supercomputers in the continents

In [None]:
continent_supercomputers = top500.groupby("Continent").size().reset_index(name='Number of Supercomputers')
draw_pie(continent_supercomputers, 'Continent', 'Number of Supercomputers', 'Percentage of Supercomputers in the continents')

# Number of Supercomputers in the Asian countries

In [None]:
asia_supercomputers = top500.query("Continent=='Asia'").groupby("Country").size().reset_index(name='Number of Supercomputers')
draw_countplot(asia_supercomputers, 'Country', 'Number of Supercomputers', 'Number of Supercomputers in the Asian countries')

# Number of Supercomputers in the North American countries

In [None]:
north_america_supercomputers = top500.query("Continent=='North America'").groupby("Country").size().reset_index(name='Number of Supercomputers')
draw_countplot(north_america_supercomputers, 'Country', 'Number of Supercomputers', 'Number of Supercomputers in the North American countries')


# Number of Supercomputers in the European countries

In [None]:
europe_supercomputers = top500.query("Continent=='Europe'").groupby("Country").size().reset_index(name='Number of Supercomputers')
draw_countplot(europe_supercomputers, 'Country', 'Number of Supercomputers', 'Number of Supercomputers in the European countries')

# Number of Supercomputers in the South American countries

In [None]:
south_america_supercomputers = top500.query("Continent=='South America'").groupby("Country").size().reset_index(name='Number of Supercomputers')
draw_countplot(south_america_supercomputers, 'Country', 'Number of Supercomputers', 'Number of Supercomputers in the South American countries')

# Number of Supercomputers in the Oceanian countries

In [None]:
oceania_supercomputers = top500.query("Continent=='Oceania'").groupby("Country").size().reset_index(name='Number of Supercomputers')
draw_countplot(oceania_supercomputers, 'Country', 'Number of Supercomputers', 'Number of Supercomputers in the Oceanian countries')

# Number of Supercomputers in the African countries

In [None]:
african_supercomputers = top500.query("Continent=='Africa'").groupby("Country").size().reset_index(name='Number of Supercomputers')
draw_countplot(african_supercomputers, 'Country', 'Number of Supercomputers', 'Number of Supercomputers in the African countries')

# Slopechart: Comparing previous rank and current rank of supercomputer

In [None]:
compare_rank = top500.copy()[['Name', 'System ID', 'Rank', 'Previous Rank']]
compare_rank.isna().sum()

Since significant number (185) of supercomputers doesn't have any name, we'll fill the null name using **System ID** column.


In [None]:
compare_rank['Name'] = compare_rank['Name'].fillna(compare_rank['System ID'])

Also, 44 computers doesn't have a previous ranking, so dropping them.

In [None]:
compare_rank.dropna(inplace=True)

In [None]:
fig, ax = plt.subplots(1,1,figsize=(10,200))

# Vertical Lines
ax.vlines(x=1, ymin=1, ymax=500, color='black', linewidth=1, linestyles='dotted')
ax.vlines(x=3, ymin=1, ymax=500, color='black', linewidth=1, linestyles='dotted')

# Points
ax.scatter(y=compare_rank['Previous Rank'], x=np.repeat(1, compare_rank.shape[0]), s=10, color='black')
ax.scatter(y=compare_rank['Rank'], x=np.repeat(3, compare_rank.shape[0]), s=10, color='black')

# Line Segmentsand Annotation
for p1, p2, c in zip(compare_rank['Previous Rank'], compare_rank['Rank'], compare_rank['Name']):
    slope_chart_new_line([1,p1], [3,p2])
    ax.text(1-0.05, p1, str(c) + ' - ' + str(round(p1)), horizontalalignment='right', verticalalignment='center', fontdict={'size':14})
    ax.text(3+0.05, p2, str(round(p2)) + ' - ' + str(c), horizontalalignment='left', verticalalignment='center', fontdict={'size':14})

# 'Previous Rank' and 'Rank' Annotations
ax.text(1-0.05, 0, 'Previous Rank', horizontalalignment='right', verticalalignment='center', fontdict={'size':16, 'weight':600})
ax.text(3+0.05, 0, 'Current Rank', horizontalalignment='left', verticalalignment='center', fontdict={'size':16, 'weight':600})

# Decoration
ax.set_title("Slopechart: Comparing previous rank and current rank of supercomputer", fontdict={'size':20}, pad=45)
ax.set(xlim=(0,4), ylim=(0,500))
ax.set_xticks([1,3])
ax.invert_yaxis()

# Lighten borders
plt.gca().spines["top"].set_alpha(.0)
plt.gca().spines["bottom"].set_alpha(.0)
plt.gca().spines["right"].set_alpha(.0)
plt.gca().spines["left"].set_alpha(.0)
plt.axis('off')
plt.show()

# Market share of Manufacturers

In [None]:
manufacturer_supercomputers = top500.groupby("Manufacturer").size().reset_index(name='Number of Supercomputers')


Merging manufacturer having less than 10 supercomputers

In [None]:
manufacturer_supercomputers.loc[manufacturer_supercomputers['Number of Supercomputers'] < 10, 'Manufacturer'] = 'Other Manufacturers'

In [None]:
draw_pie(manufacturer_supercomputers, 'Manufacturer', 'Number of Supercomputers', 'Market share of Manufacturers')

# Scatterplot: Rank vs Total Cores of Manufacturers

In [None]:
# Changing datatype
top500['Total Cores'] = top500['Total Cores'].str.replace(',','').astype(int)

In [None]:
fig = px.scatter(top500, x="Rank", y="Total Cores",size='Total Cores', color="Manufacturer")
fig.update_layout(showlegend=False)
fig.show()

# Operating system in supercomputers

In [None]:
os_supercomputers = top500.groupby("Operating System").size().reset_index(name='Number of Supercomputers')
os_supercomputers = os_supercomputers.sort_values(by='Number of Supercomputers')

In [None]:
draw_countplot(os_supercomputers, 
               'Operating System', 'Number of Supercomputers', 
               'Operating system in supercomputers',
                orientation='y')

# Source

1. Slopechart - https://www.machinelearningplus.com/plots/top-50-matplotlib-visualizations-the-master-plots-python

**Thank you for going through my Exploratory Data Analysis on [Top 500 Supercomputers](https://www.kaggle.com/rowhitswami/top-500-supercomputers).**

**No, this is not it, at all 😁**

**I'll keep updating this notebook with new insights from time to time according to my bandwidth 😉**

**Let me know below if anything (Code/Visualization) could have been better 🎉**

**Namaste 🇮🇳**