# <font color="orange"> ***🎈 Forbes billionaires of <font color="crimson"> 2021***

# *Imports and Loading dataset*

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [None]:
df = pd.read_csv("../input/forbes-billionaires-of-2021/Billionaire.csv")
df

# *Data Exploration*

In [None]:
# delete "$" and "B" in NetWorth columns

df_NetWorth = df['NetWorth'].str.strip("$, B")
df_NetWorth = df_NetWorth.to_frame()

# NetWorth dtype is object. # Change the dtype to use as int values
df_NetWorth = pd.to_numeric(df_NetWorth.NetWorth).astype(int) 
df_NetWorth

In [None]:
NetWorth = df.drop(["NetWorth"], axis = 1)
NetWorth["df_NetWorth"] = "df_NetWorth"

In [None]:
all_NetWorth = pd.concat([df_NetWorth, NetWorth], axis = 1)
all_NetWorth = all_NetWorth[["Name", "NetWorth", "Country", "Source", "Rank"]]
all_NetWorth

In [None]:
print(all_NetWorth.dtypes)

> # <font color="midnightblue"> The highest NetWorth by Name

In [None]:
Name_sum = all_NetWorth[["Name", "NetWorth"]].sort_values("NetWorth", ascending = False).head(15)
Name_sum 

In [None]:
# lollipop plot
fig, ax = plt.subplots(figsize=(16,8))
ax.vlines(x = Name_sum.index, 
          ymin = 0, ymax = Name_sum.NetWorth, 
          color = 'tomato', linewidth=5, alpha = 0.5)
ax.scatter(x = Name_sum.index, y = Name_sum.NetWorth, 
           s=200, color='red')
ax.set_xticks(Name_sum.index)
ax.set_xticklabels(Name_sum.Name.str.upper(), rotation=90, fontsize = 15)
plt.grid(color = "grey", alpha = 0.3)
plt.title("The highest NetWorth by Name", fontsize = 30, color = "blue")
plt.show()

> # <font color="midnightblue"> The highest NetWorth by Country

In [None]:
NetWorth_sum = all_NetWorth.groupby(["Country"]).sum()
NetWorth_sum = NetWorth_sum[["NetWorth"]].sort_values("NetWorth", ascending = False).head(15)
NetWorth_sum = NetWorth_sum.reset_index()
NetWorth_sum

In [None]:
fig, ax = plt.subplots(figsize=(16,8))
ax.vlines(x = NetWorth_sum.index, 
          ymin = 0, ymax = NetWorth_sum.NetWorth, 
          color = 'orange', linewidth=5, alpha = 0.5)
ax.scatter(x=NetWorth_sum.index, y=NetWorth_sum.NetWorth, 
           s=200, color='darkorange')
ax.set_xticks(NetWorth_sum.index)
ax.set_xticklabels(NetWorth_sum.Country.str.upper(), rotation=90, fontsize = 15)
plt.grid(color = "grey", alpha = 0.3)
plt.title("The highest NetWorth by Country", fontsize = 30, color = "indigo")
plt.show()

> # <font color="midnightblue"> The highest NetWorth by Source

In [None]:
Source_sum = all_NetWorth.groupby(["Source"]).sum()
Source_sum = Source_sum[["NetWorth"]].sort_values("NetWorth", ascending = False).head(15)
Source_sum = Source_sum.reset_index()
Source_sum

In [None]:
fig, ax = plt.subplots(figsize=(16,8))
ax.vlines(x = Source_sum.index, 
          ymin = 0, ymax = Source_sum.NetWorth, 
          color = 'violet', linewidth=5, alpha = 0.5)
ax.scatter(x=Source_sum.index, y=Source_sum.NetWorth, 
           s=200, color='magenta')
ax.set_xticks(Source_sum.index)
ax.set_xticklabels(Source_sum.Source.str.upper(), rotation=90, fontsize = 15)
plt.grid(color = "grey", alpha = 0.3)
plt.title("The highest NetWorth by Source", fontsize = 30, color = "darkgreen")
plt.show()

> # <font color="midnightblue"> The Country Distribution

In [None]:
df["Country"].unique()

In [None]:
# horizontal barplot 
plt.figure(figsize=(10,23))
sns.countplot(y='Country',data=df, palette = "cool")
plt.title('Country Distribution',size = 25, color = "indigo")
ax = plt.gca() 
ax.set_facecolor('seashell')
plt.show()

> # <font color="midnightblue"> The Country Distribution on Family

In [None]:
#The list of names including "family"
df_family = df[df["Name"].str.contains("family")]
df_family

In [None]:
plt.figure(figsize=(10,15))
sns.countplot(y='Country',data=df_family, palette = "winter")
plt.title('The Country Distribution on Family',size = 25, color = "purple")
plt.yticks(fontsize = 12)
ax = plt.gca() 
ax.set_facecolor('lavenderblush')
plt.show()

> # <font color="midnightblue"> The number of billionaires: Individual vs Family

In [None]:
Fam = df[df["Name"].str.contains("family")]
Fam = Fam[["Country"]].value_counts()
Fam = Fam.reset_index(name="Fam_count")
Fam

In [None]:
Indiv = df[~df["Name"].str.contains("family")]
Indiv = Indiv[["Country"]].value_counts()
Indiv = Indiv.reset_index(name="Indiv_count")
Indiv

In [None]:
Indiv_Fam = pd.merge(Indiv, Fam, on = "Country", how = "outer").fillna(0)
Indiv_Fam = Indiv_Fam.set_index("Country").head(15)
Indiv_Fam 

In [None]:
# stacked barplot
Indiv_Fam.plot.bar(stacked=True, figsize=(15,7), color = ['lime', 'yellow'])
plt.yticks(fontsize = 13)
plt.xticks(fontsize = 15, rotation = 45)
plt.xlabel("Country", fontsize = 15)
plt.title("The number of billionaires: Individual vs Family", 
          fontsize = 25, color = 'darkblue')
plt.grid(color = "grey", alpha = 0.3)
plt.legend(fontsize = 15)
plt.show()

> # <font color="midnightblue"> The Sum of NetWorth: Individual vs Family

In [None]:
# use the dateframe "all_NetWorth" which has int dtype in NetWorth
Fam2 = all_NetWorth[all_NetWorth["Name"].str.contains("family")]
Fam2 = Fam2.groupby("Country").sum()
Fam2 = Fam2[["NetWorth"]].sort_values("NetWorth", ascending = False).reset_index()
Fam2 

In [None]:
Indiv2 = all_NetWorth[~all_NetWorth["Name"].str.contains("family")]
Indiv2 = Indiv2.groupby("Country").sum()
Indiv2 = Indiv2[["NetWorth"]].sort_values("NetWorth", ascending = False).reset_index()
Indiv2

In [None]:
Indiv2_Fam2 = pd.merge(Indiv2, Fam2, on = "Country", how = "outer").fillna(0)
Indiv2_Fam2 = Indiv2_Fam2.set_index("Country").astype(int).head(15)
Indiv2_Fam2.columns = ["NetWorth_Indiv", "NetWorth_Fam"]
Indiv2_Fam2

In [None]:
Indiv2_Fam2.plot.bar(stacked=True, figsize=(15,7), color = ['dodgerblue', 'aqua'])
plt.yticks(fontsize = 13)
plt.xticks(fontsize = 15, rotation = 45)
plt.xlabel("Country", fontsize = 15)
plt.title("The Sum of NetWorth: Individual vs Family", 
          fontsize = 25, color = 'red')
plt.grid(color = "grey", alpha = 0.3)
plt.legend(fontsize = 15)
plt.show()

> # <font color="midnightblue"> The number of the most featured 15 Sources

In [None]:
df_TopSource = df[['Source']].value_counts().head(15)
df_TopSource = df_TopSource.reset_index(name = "Source_count")
df_TopSource

In [None]:
# treemap chart
import matplotlib
import squarify 
fig = plt.figure(figsize=(14,8)) 
colors = [plt.cm.gist_rainbow(i/float(len(df_TopSource["Source"]))) 
          for i in range(len(df_TopSource["Source"]))]
squarify.plot(sizes=df_TopSource["Source_count"], label=df_TopSource["Source"], 
             alpha=0.4, color=colors, text_kwargs={'fontsize':10}, 
              value = df_TopSource["Source_count"])
plt.title("The number of the most featured 15 Sources", fontsize = 25, color = "darkblue")
plt.axis('off')
plt.show()

> # <font color="midnightblue"> The number of the most featured 15 Sources

In [None]:
df_TopCountry = df[['Country']].value_counts().head(15)
df_TopCountry = df_TopCountry.reset_index(name = "Country_count")
df_TopCountry

In [None]:
fig = plt.figure(figsize=(14,8)) 
colors = [plt.cm.rainbow(i/float(len(df_TopCountry["Country"]))) 
          for i in range(len(df_TopCountry["Country"]))]
squarify.plot(sizes=df_TopCountry["Country_count"], label=df_TopCountry["Country"], 
             alpha=0.6, color=colors, text_kwargs={'fontsize':10}, 
              value = df_TopCountry["Country_count"])

plt.title("The number of the most featured 15 Countries", 
          fontsize = 25, color = "darkblue")
plt.axis('off')
plt.show()

> # <font color="midnightblue"> The most featured Source distributions of top 4 Countries

In [None]:
df["Country"].value_counts().head(4)

In [None]:
df_USA = df[df["Country"]=="United States"]
df_USA =  df_USA["Source"].value_counts().head()

df_China = df[df["Country"]=="China"]
df_China = df_China["Source"].value_counts().head()

df_India = df[df["Country"]=="India"]
df_India = df_India["Source"].value_counts().head()

df_Germany = df[df["Country"]=="Germany"]
df_Germany = df_Germany["Source"].value_counts().head()

In [None]:
# donut chart
fig = plt.figure(figsize=(14,14)) 
fig.set_facecolor('lightcyan') 

# colors
colors1 = ["red", "violet", "yellow", "greenyellow", "skyblue"]
colors2 = ["violet", "blue", "orange", "skyblue", "lime"]
colors3 = ["blue", "salmon", "palegreen", "violet", "lightgrey"]
colors4 = ["salmon", "aqua", "darkgreen", "gold", "wheat"]

# 4 pie charts
ax1 = fig.add_subplot(221) 
pie1 = ax1.pie(labels = df_USA.index, x=df_USA, colors = colors1,
       autopct=lambda p : '{:.2f}%'.format(p),wedgeprops=dict(width=0.5))
ax2 = fig.add_subplot(222) 
pie2 = ax2.pie(labels = df_China.index, x=df_China, colors = colors2,
       autopct=lambda p : '{:.2f}%'.format(p),wedgeprops=dict(width=0.5))
ax3 = fig.add_subplot(223) 
pie3 = ax3.pie(labels = df_India.index, x=df_India, colors = colors3,
       autopct=lambda p : '{:.2f}%'.format(p),wedgeprops=dict(width=0.5))
ax4 = fig.add_subplot(224) 
pie4 = ax4.pie(labels = df_Germany.index, x=df_Germany, colors = colors4,
       autopct=lambda p : '{:.2f}%'.format(p),wedgeprops=dict(width=0.5))

# subtitles
ax1.set_title("USA", fontsize = 20)
ax2.set_title("China", fontsize = 20)
ax3.set_title("India", fontsize = 20)
ax4.set_title("Germany", fontsize = 20)

plt.show()

> # <font color="midnightblue"> The most featured Country distributions of top 4 Sources

In [None]:
df["Source"].value_counts().head(4)

In [None]:
real_estate = df[df["Source"]=="real estate"]
real_estate =  real_estate["Country"].value_counts().head()

pharmaceuticals = df[df["Source"]=="pharmaceuticals"]
pharmaceuticals = pharmaceuticals["Country"].value_counts().head()

investments = df[df["Source"]=="investments"]
investments = investments["Country"].value_counts().head()

diversified = df[df["Source"]=="diversified"]
diversified = diversified["Country"].value_counts().head()

In [None]:
fig = plt.figure(figsize=(14,14)) 
fig.set_facecolor('lavenderblush') 

colors1 = ["purple", "darkviolet", "mediumorchid", "plum", "thistle"]
colors2 = ["darkblue", "mediumblue", "royalblue", "cornflowerblue", "lightsteelblue"]
colors3 = ["goldenrod", "gold", "yellow", "khaki", "lightyellow"]
colors4 = ["limegreen", "lime", "greenyellow", "lightgreen", "honeydew"]

ax1 = fig.add_subplot(221) 
pie1 = ax1.pie(labels = real_estate.index, x=real_estate, colors = colors1,
       autopct=lambda p : '{:.2f}%'.format(p),wedgeprops=dict(width=0.5))
ax2 = fig.add_subplot(222) 
pie2 = ax2.pie(labels = pharmaceuticals.index, x=pharmaceuticals, colors = colors2,
       autopct=lambda p : '{:.2f}%'.format(p),wedgeprops=dict(width=0.5))
ax3 = fig.add_subplot(223) 
pie3 = ax3.pie(labels = investments.index, x=investments, colors = colors3,
       autopct=lambda p : '{:.2f}%'.format(p),wedgeprops=dict(width=0.5))
ax4 = fig.add_subplot(224) 
pie4 = ax4.pie(labels = diversified.index, x=diversified, colors = colors4,
       autopct=lambda p : '{:.2f}%'.format(p),wedgeprops=dict(width=0.5))

# subtitles
ax1.set_title("Real estate", fontsize = 20)
ax2.set_title("Pharmaceuticals", fontsize = 20)
ax3.set_title("Investments", fontsize = 20)
ax4.set_title("Diversified", fontsize = 20)
plt.show()

> # <font color="midnightblue"> The highest NetWorth on Unique Sources

In [None]:
df_unique = df["Source"].nunique()
df_unique

In [None]:
# top 40 sources
unique_Source = all_NetWorth.groupby("Source").max("NetWorth")
unique_Source = unique_Source.reset_index().drop(["Rank"], axis = 1)
unique_Source = unique_Source.sort_values("NetWorth", ascending = False).head(40)

In [None]:
# circular barplot
plt.figure(figsize = (20,14))
ax = plt.subplot(111, polar = True)
plt.axis('off')

upperLimit = 100
lowerLimit = 30
labelPadding = 4
max = unique_Source['NetWorth'].max()
slope = (max - lowerLimit) / max
heights = slope * unique_Source.NetWorth + lowerLimit
width = 2*np.pi / len(unique_Source.index)
indexes = list(range(1, len(unique_Source.index)+1))
angles = [element * width for element in indexes]
angles

bars = ax.bar(x = angles, height = heights, width = width, bottom = lowerLimit,
    linewidth = 2, edgecolor = "white", color = "lightskyblue")

for bar, angle, height, label in zip(bars,angles, heights, unique_Source["Source"]):

    rotation = np.rad2deg(angle)
    alignment = ""
    if angle >= np.pi/2 and angle < 3*np.pi/2:
        alignment = "right"
        rotation = rotation + 180
    else: 
        alignment = "left"

    ax.text(
        x = angle, y = lowerLimit + bar.get_height()+labelPadding, 
        s = label, ha = alignment, va = 'center', 
        rotation = rotation, rotation_mode="anchor") 
plt.title("The highest NetWorth on Unique Sources", fontsize = 25, color = "blue")
plt.show()

> # <font color="midnightblue"> The highest NetWorth on Unique Sources by Name

In [None]:
Source_name = all_NetWorth.groupby(["Source", "Name"]).max("NetWorth")
Source_name = Source_name.reset_index().drop(["Rank"], axis = 1)
Source_name = Source_name.sort_values("NetWorth", ascending = False).head(40)

In [None]:
plt.figure(figsize = (20,14))
ax = plt.subplot(111, polar = True)
plt.axis('off')

upperLimit = 100
lowerLimit = 30
labelPadding = 4
max = Source_name['NetWorth'].max()
slope = (max - lowerLimit) / max
heights = slope * Source_name.NetWorth + lowerLimit
width = 2*np.pi / len(Source_name.index)
indexes = list(range(1, len(Source_name.index)+1))
angles = [element * width for element in indexes]
angles

bars = ax.bar(x = angles, height = heights, width = width, bottom = lowerLimit,
    linewidth = 2, edgecolor = "white", color = "greenyellow")

for bar, angle, height, label in zip(bars,angles, heights, Source_name["Name"]):

    rotation = np.rad2deg(angle)
    alignment = ""
    if angle >= np.pi/2 and angle < 3*np.pi/2:
        alignment = "right"
        rotation = rotation + 180
    else: 
        alignment = "left"

    ax.text(
        x = angle, y = lowerLimit + bar.get_height()+labelPadding, 
        s = label, ha = alignment, va = 'center', 
        rotation = rotation, rotation_mode="anchor") 
plt.title("The highest NetWorth on Unique Sources by Name", fontsize = 25, 
          color = "royalblue")
plt.show()

# <font color="deepskyblue"> Thank <font color="blue">you!

I refered to the site "Python graph gallery" when I made this notebook.
This site helped me a lot and that I WANNA share the site.
https://www.python-graph-gallery.com/