In [19]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

In [20]:
URL = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMSkillsNetwork-AI0272EN-SkillsNetwork/labs/dataset/2016.csv"

In [21]:

def load_data(file_path):
    try:
        # Read the CSV file into a pandas DataFrame
        df = pd.read_csv(file_path)
        
        # Print the first 5 rows of the DataFrame
        print(df.head(5))
        
        # Check the data types of the columns
        print(df.dtypes)
        
        return df
        
    except FileNotFoundError:
        print(f"File not found: {file_path}")
    except pd.errors.EmptyDataError:
        print(f"File is empty: {file_path}")
    except pd.errors.ParserError as e:
        print(f"Error parsing file: {e}")

# Call the function
df=load_data(URL)

       Country          Region  Happiness Rank  Happiness Score  \
0      Denmark  Western Europe               1            7.526   
1  Switzerland  Western Europe               2            7.509   
2      Iceland  Western Europe               3            7.501   
3       Norway  Western Europe               4            7.498   
4      Finland  Western Europe               5            7.413   

   Lower Confidence Interval Upper Confidence Interval  \
0                      7.460                     7.592   
1                      7.428                      7.59   
2                      7.333                     7.669   
3                      7.421                     7.575   
4                      7.351                     7.475   

  Economy (GDP per Capita)   Family Health (Life Expectancy)  Freedom  \
0                  1.44178  1.16374                  0.79504  0.57941   
1                  1.52733  1.14524                  0.86303  0.58557   
2                  1.42666  1

In [22]:
df.head()

Unnamed: 0,Country,Region,Happiness Rank,Happiness Score,Lower Confidence Interval,Upper Confidence Interval,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual
0,Denmark,Western Europe,1,7.526,7.46,7.592,1.44178,1.16374,0.79504,0.57941,0.44453,0.36171,2.73939
1,Switzerland,Western Europe,2,7.509,7.428,7.59,1.52733,1.14524,0.86303,0.58557,0.41203,0.28083,2.69463
2,Iceland,Western Europe,3,7.501,7.333,7.669,1.42666,1.18326,0.86733,0.56624,0.14975,0.47678,2.83137
3,Norway,Western Europe,4,7.498,7.421,7.575,1.57744,1.1269,0.79579,0.59609,0.35776,0.37895,2.66465
4,Finland,Western Europe,5,7.413,7.351,7.475,1.40598,1.13464,0.81091,0.57104,0.41004,0.25492,2.82596


In [23]:
        
def clean_data(df):
    
    try:
        
        # Remove leading and trailing whitespaces from the values in a column
        df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)
        
        # Replace empty strings with NaN values
        df = df.replace('', np.nan)
            
        # Change the data type of the columns to appropriate type as per the latest version of pandas
        
        df.loc[:, ~df.columns.isin(['Country', 'Region'])] = df.loc[:, ~df.columns.isin(['Country', 'Region'])].astype('float')

        
        return df
    except Exception as exp:
        print(f'Error occured {exp}')

preprocessed_df=clean_data(df)
preprocessed_df



DataFrame.applymap has been deprecated. Use DataFrame.map instead.



Unnamed: 0,Country,Region,Happiness Rank,Happiness Score,Lower Confidence Interval,Upper Confidence Interval,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual
0,Denmark,Western Europe,1,7.526,7.460,7.592,1.44178,1.16374,0.79504,0.57941,0.44453,0.36171,2.73939
1,Switzerland,Western Europe,2,7.509,7.428,7.59,1.52733,1.14524,0.86303,0.58557,0.41203,0.28083,2.69463
2,Iceland,Western Europe,3,7.501,7.333,7.669,1.42666,1.18326,0.86733,0.56624,0.14975,0.47678,2.83137
3,Norway,Western Europe,4,7.498,7.421,7.575,1.57744,1.12690,0.79579,0.59609,0.35776,0.37895,2.66465
4,Finland,Western Europe,5,7.413,7.351,7.475,1.40598,1.13464,0.81091,0.57104,0.41004,0.25492,2.82596
...,...,...,...,...,...,...,...,...,...,...,...,...,...
152,Benin,Sub-Saharan Africa,153,3.484,3.404,3.564,0.39499,0.10419,0.21028,0.39747,0.06681,0.20180,2.10812
153,Afghanistan,Southern Asia,154,3.360,3.288,3.432,0.38227,0.11037,0.17344,0.1643,0.07112,0.31268,2.14558
154,Togo,Sub-Saharan Africa,155,3.303,3.192,3.414,0.28123,0.00000,0.24811,0.34678,0.11587,0.17517,2.13540
155,Syria,Middle East and Northern Africa,156,3.069,2.936,3.202,0.74719,0.14866,0.62994,0.06912,0.17233,0.48397,0.81789


In [24]:
preprocessed_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 157 entries, 0 to 156
Data columns (total 13 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   Country                        157 non-null    object 
 1   Region                         157 non-null    object 
 2   Happiness Rank                 157 non-null    int64  
 3   Happiness Score                157 non-null    float64
 4   Lower Confidence Interval      153 non-null    float64
 5   Upper Confidence Interval      154 non-null    object 
 6   Economy (GDP per Capita)       155 non-null    object 
 7   Family                         157 non-null    float64
 8   Health (Life Expectancy)       154 non-null    object 
 9   Freedom                        156 non-null    object 
 10  Trust (Government Corruption)  157 non-null    float64
 11  Generosity                     157 non-null    float64
 12  Dystopia Residual              157 non-null    flo

##  Data Insights and Visualization
 Identify the GDP per capita and Healthy Life Expectancy of the top 10 countries.

In [25]:
preprocessed_df['Economy (GDP per Capita)']=preprocessed_df['Economy (GDP per Capita)'].astype('float')
preprocessed_df['Health (Life Expectancy)']=preprocessed_df['Health (Life Expectancy)'].astype('float')
preprocessed_df['Freedom']=preprocessed_df['Freedom'].astype('float')


In [26]:
# Identify the top 10 countries by GDP per capita and Healthy Life Expectancy
top_10 = preprocessed_df.nlargest(10,'Economy (GDP per Capita)')

# Create a bar chart to show the GDP per capita and Healthy Life Expectancy of the top 10 countries

fig1 = px.bar(top_10, x='Country', 
              y='Economy (GDP per Capita)', 
              title='GDP per Capita and Healthy Life Expectancy of Top 10 Countries')

# Display the figure
fig1.show()

Find the correlation between the Economy (GDP per Capita), Family, Health (Life Expectancy), Freedom, Trust (Government Corruption), Generosity and Happiness score.

In [27]:
# Create a sub-dataset including Economy (GDP per Capita), Family, Health (Life Expectancy), Freedom, Trust (Government Corruption), Generosity, and Happiness Score attributes

sub_df = preprocessed_df[['Economy (GDP per Capita)', 'Family', 
             'Health (Life Expectancy)', 'Freedom', 
             'Trust (Government Corruption)', 
             'Generosity', 'Happiness Score']]

# Create a heatmap to show the correlation between attributes in the subdataset
fig2 = px.imshow(sub_df.corr(), 
                 title='Correlation between attributes in the subdataset')
fig2.update_layout(height=600, width=800)
fig2.show()

Create a scatter plot to identify the effect of GDP per Capita on Happiness Score in various Regions. Use plotly for creating the plot.

In [28]:

import plotly.graph_objects as go


# Filter the data to include only the required columns
scatter_df = preprocessed_df[['Happiness Score', 'Economy (GDP per Capita)']]

# Create a scatter plot
fig3 = px.scatter(scatter_df, x='Happiness Score', y='Economy (GDP per Capita)', 
                color='Economy (GDP per Capita)', 
                color_discrete_sequence=[0, 1], 
                title='Relationship between Happiness Score and GDP per Capita')

# Show the plot
fig3.show()

Create a pie chart to present Happiness Score by Regions

In [29]:
# Filter the data to include only the required columns
scatter_df2 = preprocessed_df[['Happiness Score', 'Region', 'Economy (GDP per Capita)']]

# Create a pie chart
fig4 = px.pie(scatter_df2, values='Happiness Score', names='Region', 
             title='Distribution of Happiness Score by Region', 
             hole=0.5, 
             color_discrete_sequence=['#636EFA', '#EF553B', '#00CC96', '#AB63FA', '#FFA15C'])

# Show the plot
fig4.show()

Create a map to display GDP per capita of countries and include Healthy life expectancy to be shown as a tooltip

In [30]:

# Filter the data to include only the required columns
scatter_df3 = preprocessed_df[['Country', 'Economy (GDP per Capita)', 'Health (Life Expectancy)']]

# Create a choropleth map
fig5 = px.choropleth(scatter_df3, 
                    locations='Country',  # Country column for location mapping
                    color='Economy (GDP per Capita)',  # Color based on GDP per Capita
                    hover_name='Country',  # Country name on hover
                    hover_data=['Economy (GDP per Capita)', 'Health (Life Expectancy)'],  # Show additional data on hover
                    color_continuous_scale='Blues',  # Color scale for continuous data
                    title='GDP per Capita and Healthy Life Expectancy by Country')

# Customize layout
fig5.update_layout(
    geo_scope='world',  # Scope to 'world' for global map
    font_size=12,
    font_family='Arial',
    font_color='black',  # Changed font color for better visibility
    margin={"r": 0, "t": 0, "l": 0, "b": 0},
    showlegend=False,
    height=800,
    width=800
)

# Show the plot
fig5.show()


Write prompts that generate codes to write at least four of the graph plots generated in the previous steps into a HTML page.

In [35]:
from plotly.subplots import make_subplots

# Create a subplot layout
fig = make_subplots(
    rows=2, cols=2,  # 3 rows and 2 columns
    subplot_titles=('GDP per Capita by Country', 'Distribution of Happiness Score by Region',
                    'Happiness Score vs GDP per Capita', 'Happiness Score by GDP per Capita',
                    'GDP per Capita Choropleth Map'),
    vertical_spacing=0.15  # Adjust the spacing between the rows
)

# Add each figure to the subplot layout
fig.add_trace(fig1.data[0], row=1, col=1)  # Add choropleth map (fig1) to the first row
fig.add_trace(fig2.data[0], row=1, col=2)  # Add pie chart (fig2) to the first row
fig.add_trace(fig3.data[0], row=2, col=1)  # Add scatter plot (fig3) to the second row
#fig.add_trace(fig4.data[0], row=2, col=2)  # Add bar chart (fig4) to the second row
# fig.add_trace(fig5.data[0], row=2, col=2)  # Add choropleth map (fig5) to the third row

# Update layout and set a title for the entire dashboard
fig.update_layout(
    title_text="Happiness Score and Economy Dashboard",
    showlegend=False,  # Hide legend for the entire layout
    height=1000,  # Adjust height of the entire layout
    width=1000,  # Adjust width of the entire layout
    margin={"r": 0, "t": 50, "l": 0, "b": 50}  # Set margins
)

# Save the subplot figures to a single HTML file
fig.write_html('dashboard.html')