## PROJECT TWO: Forbes Billionaires

In [3]:
# Import relevant dependencies

import pandas as pd
from sqlalchemy import create_engine
from config import sql

In [5]:
# Import the csv file
forbes_data = pd.read_csv('Resources/forbes_billionaires_geo.csv')

# Preview the data
forbes_data.head()

Unnamed: 0,Name,NetWorth,Country,Source,Rank,Age,Residence,Citizenship,Status,Children,Education,Self_made,geometry
0,Jeff Bezos,177.0,United States,Amazon,1,57.0,"Seattle, Washington",United States,In Relationship,4.0,"Bachelor of Arts/Science, Princeton University",True,POINT (-122.3300624 47.6038321)
1,Elon Musk,151.0,United States,"Tesla, SpaceX",2,49.0,"Austin, Texas",United States,In Relationship,7.0,"Bachelor of Arts/Science, University of Pennsy...",True,POINT (-97.74369950000001 30.2711286)
2,Bernard Arnault & family,150.0,France,LVMH,3,72.0,"Paris, France",France,Married,5.0,"Bachelor of Arts/Science, Ecole Polytechnique ...",False,POINT (2.3514616 48.8566969)
3,Bill Gates,124.0,United States,Microsoft,4,65.0,"Medina, Washington",United States,Divorced,3.0,"Drop Out, Harvard University",True,POINT (-122.2264453 47.620548)
4,Mark Zuckerberg,97.0,United States,Facebook,5,36.0,"Palo Alto, California",United States,Married,2.0,"Drop Out, Harvard University",True,POINT (-122.1598465 37.4443293)


In [6]:
# Split the Residence & Education columns

forbes_data[["Education - Degree", "Education - University"]] = forbes_data["Education"].str.split(',', 1, expand=True)


In [7]:
# Drop the Residence & Education column
forbes_data = forbes_data.drop(columns=["Residence", "Education"])


In [8]:
# Check n/a values
check_na = forbes_data.isnull().sum()
print (check_na)

Name                         0
NetWorth                     0
Country                      0
Source                       0
Rank                         0
Age                        125
Citizenship                 16
Status                     665
Children                  1203
Self_made                   18
geometry                     0
Education - Degree        1346
Education - University    1365
dtype: int64


In [9]:
# Drop any row that have na
forbes_data.dropna(inplace=True)

# Check the length of the data
len(forbes_data)

983

In [10]:
# Preview the data

forbes_data.head()

Unnamed: 0,Name,NetWorth,Country,Source,Rank,Age,Citizenship,Status,Children,Self_made,geometry,Education - Degree,Education - University
0,Jeff Bezos,177.0,United States,Amazon,1,57.0,United States,In Relationship,4.0,True,POINT (-122.3300624 47.6038321),Bachelor of Arts/Science,Princeton University
1,Elon Musk,151.0,United States,"Tesla, SpaceX",2,49.0,United States,In Relationship,7.0,True,POINT (-97.74369950000001 30.2711286),Bachelor of Arts/Science,University of Pennsylvania
2,Bernard Arnault & family,150.0,France,LVMH,3,72.0,France,Married,5.0,False,POINT (2.3514616 48.8566969),Bachelor of Arts/Science,Ecole Polytechnique de Paris
3,Bill Gates,124.0,United States,Microsoft,4,65.0,United States,Divorced,3.0,True,POINT (-122.2264453 47.620548),Drop Out,Harvard University
4,Mark Zuckerberg,97.0,United States,Facebook,5,36.0,United States,Married,2.0,True,POINT (-122.1598465 37.4443293),Drop Out,Harvard University


In [11]:
# Split geometry & drop the geometry columns 

forbes_data[["geometry1", "Longitude_Latitude"]] = forbes_data["geometry"].str.split(' ', 1, expand=True)
forbes_data = forbes_data.drop(columns=["geometry", "geometry1"])

# Split longitude & latitude again into separate columns & drop the original column

forbes_data[["Longitude", "Latitude"]] = forbes_data["Longitude_Latitude"].str.split(' ', 1, expand=True)
forbes_data = forbes_data.drop(columns=["Longitude_Latitude"])

# Remove the brackets on both columns

forbes_data["Longitude"] = forbes_data["Longitude"].str[1:]
forbes_data["Latitude"] = forbes_data["Latitude"].str[:-1]



In [12]:
forbes_data.head()

Unnamed: 0,Name,NetWorth,Country,Source,Rank,Age,Citizenship,Status,Children,Self_made,Education - Degree,Education - University,Longitude,Latitude
0,Jeff Bezos,177.0,United States,Amazon,1,57.0,United States,In Relationship,4.0,True,Bachelor of Arts/Science,Princeton University,-122.3300624,47.6038321
1,Elon Musk,151.0,United States,"Tesla, SpaceX",2,49.0,United States,In Relationship,7.0,True,Bachelor of Arts/Science,University of Pennsylvania,-97.7436995,30.2711286
2,Bernard Arnault & family,150.0,France,LVMH,3,72.0,France,Married,5.0,False,Bachelor of Arts/Science,Ecole Polytechnique de Paris,2.3514616,48.8566969
3,Bill Gates,124.0,United States,Microsoft,4,65.0,United States,Divorced,3.0,True,Drop Out,Harvard University,-122.2264453,47.620548
4,Mark Zuckerberg,97.0,United States,Facebook,5,36.0,United States,Married,2.0,True,Drop Out,Harvard University,-122.1598465,37.4443293


In [13]:
# Rename all columns to lower cases - to import into postgreSQL

forbes_data = forbes_data.rename(columns={
    "Name": "name", 
    "NetWorth": "networth", 
    "Country": "country", 
    "Source": "source",
    "Rank": "rank", 
    "Age": "age", 
    "Citizenship": "citizenship", 
    "Status": "status",
    "Children": "children", 
    "Self_made": "self_made", 
    "Education - Degree": "degree", 
    "Education - University": "university",
    "Longitude": "longitude", 
    "Latitude": "latitude"})


In [14]:
# Rename the index to align with the name in the table created in postgreSQL

forbes_data.index.names = ['id']


In [15]:
# Preview the data

forbes_data.head()


Unnamed: 0_level_0,name,networth,country,source,rank,age,citizenship,status,children,self_made,degree,university,longitude,latitude
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
0,Jeff Bezos,177.0,United States,Amazon,1,57.0,United States,In Relationship,4.0,True,Bachelor of Arts/Science,Princeton University,-122.3300624,47.6038321
1,Elon Musk,151.0,United States,"Tesla, SpaceX",2,49.0,United States,In Relationship,7.0,True,Bachelor of Arts/Science,University of Pennsylvania,-97.7436995,30.2711286
2,Bernard Arnault & family,150.0,France,LVMH,3,72.0,France,Married,5.0,False,Bachelor of Arts/Science,Ecole Polytechnique de Paris,2.3514616,48.8566969
3,Bill Gates,124.0,United States,Microsoft,4,65.0,United States,Divorced,3.0,True,Drop Out,Harvard University,-122.2264453,47.620548
4,Mark Zuckerberg,97.0,United States,Facebook,5,36.0,United States,Married,2.0,True,Drop Out,Harvard University,-122.1598465,37.4443293


In [16]:
# Export the data to csv

forbes_data.to_csv('forbes_cleandata.csv')

In [17]:
# Check all column types to assist creating table in postgreSQL
forbes_data.dtypes

name            object
networth       float64
country         object
source          object
rank             int64
age            float64
citizenship     object
status          object
children       float64
self_made       object
degree          object
university      object
longitude       object
latitude        object
dtype: object

### Create database connection

In [15]:
#rds_connection_string = "postgres:<insert password>@localhost:5432/customer_db"
engine = create_engine(f'postgresql://postgres:{sql}@localhost:5432/Project_two')


In [16]:
# Confirm tables
engine.table_names()


['forbes_billionaires']

### Loading into PostgreSQL

In [18]:
# Loading into Postgre

forbes_data.to_sql(name='forbes_billionaires', con=engine, if_exists='append', index=True)


In [19]:
# Preview the final table in SQL

pd.read_sql_query('select * from forbes_billionaires', con=engine).head()


Unnamed: 0,id,name,networth,country,source,rank,age,citizenship,status,children,self_made,degree,university,longitude,latitude
0,0,Jeff Bezos,177.0,United States,Amazon,1,57.0,United States,In Relationship,4.0,True,Bachelor of Arts/Science,Princeton University,-122.3300624,47.6038321
1,1,Elon Musk,151.0,United States,"Tesla, SpaceX",2,49.0,United States,In Relationship,7.0,True,Bachelor of Arts/Science,University of Pennsylvania,-97.7436995,30.2711286
2,2,Bernard Arnault & family,150.0,France,LVMH,3,72.0,France,Married,5.0,False,Bachelor of Arts/Science,Ecole Polytechnique de Paris,2.3514616,48.8566969
3,3,Bill Gates,124.0,United States,Microsoft,4,65.0,United States,Divorced,3.0,True,Drop Out,Harvard University,-122.2264453,47.620548
4,4,Mark Zuckerberg,97.0,United States,Facebook,5,36.0,United States,Married,2.0,True,Drop Out,Harvard University,-122.1598465,37.4443293
