In [49]:
# Install MySql packages
# conda update -n base -c conda-forge conda --yes
# !pip install ipython-sql
# !pip install mysql-connector-python
# !conda install -c conda-forge python-dotenv --yes

In [33]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels as sm
import mysql.connector
from mysql.connector import Error
from os import environ, path
from dotenv import load_dotenv

In [34]:
# Read enviromental variables to connect the database
load_dotenv()
password = environ.get('PASSWORD')
host = environ.get('HOST')
database = environ.get('DATABASE')

## Connect to the database and create dataframes for each table

In [35]:
try:
    connection = mysql.connector.connect(host=host ,
                                        database=database,
                                        user='root',
                                        password=password)
    if connection.is_connected():
        print("Succes Connected")
    df_musician = pd.read_sql_query("SELECT * FROM music_and_death.musician;", connection)
    df_birth = pd.read_sql_query("SELECT * FROM music_and_death.birth;", connection)
    df_death = pd.read_sql_query("SELECT * FROM music_and_death.death;", connection)
    df_genre = pd.read_sql_query("SELECT * FROM music_and_death.music_genre;", connection)
    genre = pd.read_sql_query("SELECT * FROM music_and_death.genre;", connection)
    cause_death = pd.read_sql_query("SELECT * FROM music_and_death.cause_death;", connection)
    
except Error as e:
    print("Error closing connection")

# Concatenate tables
else:
    # Concatenate musician and death dataframes
    df = pd.concat([df_musician, df_birth], axis=1)
    df = pd.concat([df, df_death], axis=1)
    df = pd.merge(df, cause_death, on=["death_id", "death_id"])
    df = df.loc[:, ~df.columns.duplicated()].copy()
    df = pd.merge(df, df_genre, on=["musician_id"])
    df = df.loc[:, ~df.columns.duplicated()].copy()
    df = pd.merge(df, genre, on=["genre_id"])
    # rearrange columns
    df = df.iloc[:, [0, 1, 2, 3, 5, 7, 8, 9, 11, 6, 4, 10]]
    
finally:
    connection.close()

Succes Connected


In [36]:
df

Unnamed: 0,musician_id,stage_name,middle_name,stage_lastname,birth,death_date,death_age,cause,genre,birthplace_id,death_id,genre_id
0,1,John,Winston,Lennon,1940-10-09,1980-12-08,40,homicide,rock,50337,1,2
1,2,Kurt,Donald,Cobain,1967-02-20,1994-04-05,27,suicide,rock,110979,2,2
2,17,Chris,John,Cornell,1964-07-20,2017-05-18,52,suicide,rock,126104,2,2
3,18,Chester,Charles,Bennington,1976-03-20,2017-07-20,41,suicide,rock,124148,2,2
4,32,Ian,Kevin,Curtis,1956-07-15,1980-05-18,23,suicide,rock,51499,2,2
...,...,...,...,...,...,...,...,...,...,...,...,...
119,113,Al,,Jolson,1886-06-09,1950-10-23,64,heart disease,jazz,66683,7,9
120,121,Hot,Lips,Page,1908-01-27,1954-11-05,46,heart disease,jazz,114990,7,9
121,21,Amy,Jade,Winehouse,1983-09-14,2011-07-23,27,alcohol overdose,jazz,50388,8,9
122,122,Dink,,Johnson,1892-10-28,1954-11-29,62,not reported,jazz,122756,16,9


## Add birth places

Unnamed: 0,musician_id,stage_name,middle_name,stage_lastname,birth,death_date,death_age,cause,genre,birthplace_id
0,1,John,Winston,Lennon,1940-10-09,1980-12-08,40,homicide,rock,50337
1,2,Kurt,Donald,Cobain,1967-02-20,1994-04-05,27,suicide,rock,110979
2,17,Chris,John,Cornell,1964-07-20,2017-05-18,52,suicide,rock,126104
3,18,Chester,Charles,Bennington,1976-03-20,2017-07-20,41,suicide,rock,124148
4,32,Ian,Kevin,Curtis,1956-07-15,1980-05-18,23,suicide,rock,51499
...,...,...,...,...,...,...,...,...,...,...
119,113,Al,,Jolson,1886-06-09,1950-10-23,64,heart disease,jazz,66683
120,121,Hot,Lips,Page,1908-01-27,1954-11-05,46,heart disease,jazz,114990
121,21,Amy,Jade,Winehouse,1983-09-14,2011-07-23,27,alcohol overdose,jazz,50388
122,122,Dink,,Johnson,1892-10-28,1954-11-29,62,not reported,jazz,122756


In [23]:
pd.concat([df, df_birth], axis=1)

Unnamed: 0,musician_id,stage_name,middle_name,stage_lastname,death_id,death_date,death_age,cause,genre_id,genre,musician_id.1,birth,birthplace_id
0,1,John,Winston,Lennon,1,1980-12-08,40,homicide,2,rock,1,1940-10-09,50337
1,2,Kurt,Donald,Cobain,2,1994-04-05,27,suicide,2,rock,2,1967-02-20,110979
2,17,Chris,John,Cornell,2,2017-05-18,52,suicide,2,rock,3,1946-11-16,109304
3,18,Chester,Charles,Bennington,2,2017-07-20,41,suicide,2,rock,4,1966-08-20,116141
4,32,Ian,Kevin,Curtis,2,1980-05-18,23,suicide,2,rock,5,1956-12-06,125919
...,...,...,...,...,...,...,...,...,...,...,...,...,...
119,113,Al,,Jolson,7,1950-10-23,64,heart disease,9,jazz,120,1920-06-20,123482
120,121,Hot,Lips,Page,7,1954-11-05,46,heart disease,9,jazz,121,1908-01-27,114990
121,21,Amy,Jade,Winehouse,8,2011-07-23,27,alcohol overdose,9,jazz,122,1892-10-28,122756
122,122,Dink,,Johnson,16,1954-11-29,62,not reported,9,jazz,123,1954-12-15,122560


In [12]:
df_2 = df.drop(['middle_name', 'death_id', "death_date", "genre_id"], axis=1)

In [13]:
df_2

Unnamed: 0,musician_id,stage_name,stage_lastname,death_age,cause,genre
0,1,John,Lennon,40,homicide,rock
1,2,Kurt,Cobain,27,suicide,rock
2,17,Chris,Cornell,52,suicide,rock
3,18,Chester,Bennington,41,suicide,rock
4,32,Ian,Curtis,23,suicide,rock
...,...,...,...,...,...,...
119,113,Al,Jolson,64,heart disease,jazz
120,121,Hot,Page,46,heart disease,jazz
121,21,Amy,Winehouse,27,alcohol overdose,jazz
122,122,Dink,Johnson,62,not reported,jazz
