#Entertainer Data Analytics

##Analyze the Entertainer Basic Info Dataset:

Load the packages

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
df_Basic = pd.read_excel( "/content/Entertainer - Basic Info.xlsx")

Read the Data

In [None]:
# View the basic info data of Entertainers
df_Basic.head(10)

Unnamed: 0,Entertainer,Gender (traditional),Birth Year
0,Adele,F,1988
1,Angelina Jolie,F,1975
2,Aretha Franklin,F,1942
3,Bette Davis,F,1908
4,Betty White,F,1922
5,Bing Crosby,M,1903
6,Bob Hope,M,1903
7,Carol Burnett,F,1933
8,Carole Lombard,F,1908
9,Carrie Fisher,F,1956



Data Cleaning

In [None]:
df_Basic.isnull().sum()  #there is no null values.

Entertainer             0
Gender (traditional)    0
Birth Year              0
dtype: int64

In [None]:
df_Basic.shape

(70, 3)

In [None]:
min(df_Basic['Birth Year'])

1889

In [None]:

max(df_Basic['Birth Year'])

1988

In [None]:
#Converting Numerical Data into Range

def BirthYear_rng(r):
    if r>=1889 and r<1909:
        return '1889-1909'
    elif r>=1909 and r<1939:
        return '1909-1939'
    elif r>=1939 and r<1959:
        return '1939-1959'
    elif r>=1959 and r<=1988:
        return '1959-1988'


In [None]:
#Applying converted data into our dataset with new column - BirthYear_rng

df_Basic['BirthYear_rng']=df_Basic['Birth Year'].apply(BirthYear_rng)
df_Basic.head()

Unnamed: 0,Entertainer,Gender (traditional),Birth Year,BirthYear_rng
0,Adele,F,1988,1959-1988
1,Angelina Jolie,F,1975,1959-1988
2,Aretha Franklin,F,1942,1939-1959
3,Bette Davis,F,1908,1889-1909
4,Betty White,F,1922,1909-1939


In [None]:
df_Basic.describe().T #here we wil get statistical metrics.

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Birth Year,70.0,1935.585714,24.135783,1889.0,1916.0,1935.5,1954.0,1988.0


In [None]:
# Now we will export the preprocessed dataset to a csv file with no row indexes

df_Basic.to_csv('preprocessed_Entertainment_Basic_Info.csv',index = False)

##Analyze the Entertainer Breakthrough Info Dataset

In [None]:

# Import Entertainer_Breakthrough Info
df_Breakthrough = pd.read_excel("/content/Entertainer - Breakthrough Info.xlsx")

In [None]:

df_Breakthrough.head(10)

Unnamed: 0,Entertainer,Year of Breakthrough/#1 Hit/Award Nomination,Breakthrough Name,Year of First Oscar/Grammy/Emmy
0,Adele,2008,19,2009.0
1,Angelina Jolie,1999,"Girl, Interrupted",1999.0
2,Aretha Franklin,1967,I Never Loved a Man (The Way I Love You),1968.0
3,Bette Davis,1934,Of Human Bondage,1935.0
4,Betty White,1952,Life with Elilzabeth,1976.0
5,Bing Crosby,1931,Several Songs,1962.0
6,Bob Hope,1938,The Big Broadcast of 1938,1940.0
7,Carol Burnett,1959,The Garry Moore Show,1962.0
8,Carole Lombard,1934,Twentieth Century,
9,Carrie Fisher,1977,Star Wars,


In [None]:
df_Breakthrough.isnull().sum()  #there are few null values but those are not important.

Entertainer                                     0
Year of Breakthrough/#1 Hit/Award Nomination    0
Breakthrough Name                               0
Year of First Oscar/Grammy/Emmy                 6
dtype: int64

In [None]:
min(df_Breakthrough['Year of Breakthrough/#1 Hit/Award Nomination'])

1915

In [None]:
max(df_Breakthrough['Year of Breakthrough/#1 Hit/Award Nomination'])

2008

In [None]:
#Converting Numerical Data into Range

def Award_Nomination_rng(r):
    if r>=1915 and r<1935:
        return '1915-1935'
    elif r>=1935 and r<1955:
        return '1935-1955'
    elif r>=1955 and r<1975:
        return '1955-1975'
    elif r>=1975 and r<=2008:
        return '1975-2008'

In [None]:
#Applying converted data into our dataset with new column - Award_Nomination_rng

df_Breakthrough['Award_Nomination_rng']=df_Breakthrough['Year of Breakthrough/#1 Hit/Award Nomination'].apply(Award_Nomination_rng)
df_Breakthrough.head()

Unnamed: 0,Entertainer,Year of Breakthrough/#1 Hit/Award Nomination,Breakthrough Name,Year of First Oscar/Grammy/Emmy,Award_Nomination_rng
0,Adele,2008,19,2009.0,1975-2008
1,Angelina Jolie,1999,"Girl, Interrupted",1999.0,1975-2008
2,Aretha Franklin,1967,I Never Loved a Man (The Way I Love You),1968.0,1955-1975
3,Bette Davis,1934,Of Human Bondage,1935.0,1915-1935
4,Betty White,1952,Life with Elilzabeth,1976.0,1935-1955


In [None]:
min(df_Breakthrough['Year of First Oscar/Grammy/Emmy'])

1929.0

In [None]:
max(df_Breakthrough['Year of First Oscar/Grammy/Emmy'])

2017.0

In [None]:
#Converting Numerical Data into Range

def Grammy_rng(r):
    if r>=1929 and r<1949:
        return '1929-1949'
    elif r>=1949 and r<1969:
        return '1949-19196955'
    elif r>=1969 and r<1989:
        return '1969-1989'
    elif r>=1989 and r<=2017:
        return '1989-2017'

In [None]:
#Applying converted data into our dataset with new column - Award_Nomination_rng

df_Breakthrough['Grammy_rng']=df_Breakthrough['Year of First Oscar/Grammy/Emmy'].apply(Grammy_rng)
df_Breakthrough.head()

Unnamed: 0,Entertainer,Year of Breakthrough/#1 Hit/Award Nomination,Breakthrough Name,Year of First Oscar/Grammy/Emmy,Award_Nomination_rng,Grammy_rng
0,Adele,2008,19,2009.0,1975-2008,1989-2017
1,Angelina Jolie,1999,"Girl, Interrupted",1999.0,1975-2008,1989-2017
2,Aretha Franklin,1967,I Never Loved a Man (The Way I Love You),1968.0,1955-1975,1949-19196955
3,Bette Davis,1934,Of Human Bondage,1935.0,1915-1935,1929-1949
4,Betty White,1952,Life with Elilzabeth,1976.0,1935-1955,1969-1989


In [None]:
df_Breakthrough.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Year of Breakthrough/#1 Hit/Award Nomination,70.0,1964.228571,22.411935,1915.0,1949.5,1963.5,1983.5,2008.0
Year of First Oscar/Grammy/Emmy,64.0,1976.234375,22.170152,1929.0,1962.0,1978.0,1993.0,2017.0


In [None]:
# Now we will export the preprocessed dataset to a csv file with no row indexes

df_Breakthrough.to_csv('preprocessed_Breakthrough.csv',index = False)

##Analyze the Entertainer Last work Info Dataset

In [None]:

# Import Entertainer_Last work info
df_Last_work_info = pd.read_excel("/content/Entertainer - Last work Info.xlsx")

In [None]:
df_Last_work_info.head(10)

Unnamed: 0,Entertainer,Year of Last Major Work (arguable),Year of Death
0,Adele,2016,
1,Angelina Jolie,2016,
2,Aretha Franklin,2014,
3,Bette Davis,1989,1989.0
4,Betty White,2016,
5,Bing Crosby,1974,1977.0
6,Bob Hope,1972,2003.0
7,Carol Burnett,2016,
8,Carole Lombard,1942,1942.0
9,Carrie Fisher,2016,2016.0


In [None]:
df_Last_work_info.isnull().sum()  #there are few null values but those are not important.

Entertainer                            0
Year of Last Major Work (arguable)     0
Year of Death                         40
dtype: int64

In [None]:
df_Last_work_info=df_Last_work_info.dropna()

In [None]:
df_Last_work_info.isnull().sum()

Entertainer                           0
Year of Last Major Work (arguable)    0
Year of Death                         0
dtype: int64

In [None]:
min(df_Last_work_info['Year of Last Major Work (arguable)'])

1933

In [None]:
max(df_Last_work_info['Year of Last Major Work (arguable)'])

2016

In [None]:
#Converting Numerical Data into Range

def Last_Work_rng(r):
    if r>=1933 and r<1953:
        return '1933-1953'
    elif r>=1953 and r<1973:
        return '1953-1973'
    elif r>=1973 and r<1993:
        return '1973-1993'
    elif r>=1993 and r<=2016:
        return '1993-2016'

In [None]:
#Applying converted data into our dataset with new column - Award_Nomination_rng

df_Last_work_info['Last_Work_rng']=df_Last_work_info['Year of Last Major Work (arguable)'].apply(Last_Work_rng)
df_Last_work_info.head()

Unnamed: 0,Entertainer,Year of Last Major Work (arguable),Year of Death,Last_Work_rng
3,Bette Davis,1989,1989.0,1973-1993
5,Bing Crosby,1974,1977.0,1973-1993
6,Bob Hope,1972,2003.0,1953-1973
8,Carole Lombard,1942,1942.0,1933-1953
9,Carrie Fisher,2016,2016.0,1993-2016


In [None]:
min(df_Last_work_info['Year of Death'])

1942.0

In [None]:
max(df_Last_work_info['Year of Death'])

2016.0

In [None]:
#Converting Numerical Data into Range

def Death_Yr_rng(r):
    if r>=1942 and r<1962:
        return '1942-1962'
    elif r>=1962 and r<1982:
        return '1962-1982'
    elif r>=1982 and r<2002:
        return '1982-2002'
    elif r>=2002 and r<=2016:
        return '2002-2016'

In [None]:
#Applying converted data into our dataset with new column - Award_Nomination_rng

df_Last_work_info['Death_Yr_rng']=df_Last_work_info['Year of Death'].apply(Death_Yr_rng)
df_Last_work_info.head()

Unnamed: 0,Entertainer,Year of Last Major Work (arguable),Year of Death,Last_Work_rng,Death_Yr_rng
3,Bette Davis,1989,1989.0,1973-1993,1982-2002
5,Bing Crosby,1974,1977.0,1973-1993,1962-1982
6,Bob Hope,1972,2003.0,1953-1973,2002-2016
8,Carole Lombard,1942,1942.0,1933-1953,1942-1962
9,Carrie Fisher,2016,2016.0,1993-2016,2002-2016


In [None]:
# Now we will export the preprocessed dataset to a csv file with no row indexes

df_Last_work_info.to_csv('preprocessed_Last_work.csv',index = False)