In [1]:
import pandas as pd

In [2]:
# Load the dataset
data = pd.read_csv("cleaned_billionaires.csv")

In [8]:
# Filter the dataset to include only relevant columns
filtered_data = data[['wealth.how.inherited', 'wealth.how.was founder']]
filtered_data.head()

Unnamed: 0,wealth.how.inherited,wealth.how.was founder
0,not inherited,True
1,not inherited,True
2,not inherited,True
3,not inherited,True
4,not inherited,True


In [4]:
# Create a new binary column for self-made vs. inherited
data['self_made'] = data.apply(lambda row: 'self-made' if row['wealth.how.inherited'] == 'not inherited' and row['wealth.how.was founder'] == 'true' else 'inherited', axis=1)

# Display the dataset with the new 'self_made' column
data[['wealth.how.inherited', 'wealth.how.was founder', 'self_made']].head()

Unnamed: 0,wealth.how.inherited,wealth.how.was founder,self_made
0,not inherited,True,inherited
1,not inherited,True,inherited
2,not inherited,True,inherited
3,not inherited,True,inherited
4,not inherited,True,inherited


In [10]:
# Calculate the total count of billionaires
total_count = len(data)

# Count the number of individuals with inherited wealth
inherited_wealth = len(data[data['wealth.how.inherited'] != 'not inherited'])

# Calculate the proportion of self-made billionaires
self_made_proportion = (total_count - inherited_wealth) / total_count

# Calculate the proportion of inherited billionaires
inherited_proportion = inherited_wealth / total_count

# Print the proportions
print("Proportion of Self-Made Billionaires: {:.2%}".format(self_made_proportion))
print("Proportion of Inherited Billionaires: {:.2%}".format(inherited_proportion))

Proportion of Self-Made Billionaires: 64.04%
Proportion of Inherited Billionaires: 35.96%


In [11]:
# Gender Analysis
gender_proportions = data.groupby('demographics.gender')['self_made'].value_counts(normalize=True).unstack()
print("Proportions of Self-Made vs. Inherited Billionaires by Gender:")
print(gender_proportions)

Proportions of Self-Made vs. Inherited Billionaires by Gender:
self_made            inherited
demographics.gender           
female                     1.0
male                       1.0
married couple             1.0


In [12]:
# Location Analysis
location_proportions = data.groupby('location.citizenship')['self_made'].value_counts(normalize=True).unstack()
print("\nProportions of Self-Made vs. Inherited Billionaires by Location:")
print(location_proportions)


Proportions of Self-Made vs. Inherited Billionaires by Location:
self_made             inherited
location.citizenship           
Algeria                     1.0
Angola                      1.0
Argentina                   1.0
Australia                   1.0
Austria                     1.0
...                         ...
United Arab Emirates        1.0
United Kingdom              1.0
United States               1.0
Venezuela                   1.0
Vietnam                     1.0

[73 rows x 1 columns]


In [14]:
# Group the data by gender
gender_grouped = data.groupby('demographics.gender')

# Calculate the proportions of self-made and inherited billionaires for each gender
gender_proportions = gender_grouped['self_made'].value_counts(normalize=True).unstack()

# Display the proportions
print("Proportions of Self-Made vs. Inherited Billionaires by Gender:")
print(gender_proportions)

Proportions of Self-Made vs. Inherited Billionaires by Gender:
self_made            inherited
demographics.gender           
female                     1.0
male                       1.0
married couple             1.0


In [18]:
# Count the number of male and female billionaires
gender_counts = data['demographics.gender'].value_counts()

# Display the counts
print("Number of Male Billionaires:", gender_counts['male'])
print("Number of Female Billionaires:", gender_counts['female'])

Number of Male Billionaires: 2288
Number of Female Billionaires: 248
