<a href="https://colab.research.google.com/github/Rodrigofch7/Exploratory-Data-Analysis/blob/main/UNEMPLOYMENT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import plotly.express as px


df = pd.read_csv('/content/world-data-2023.csv')

In [2]:
df.dropna(inplace=True)
df

Unnamed: 0,Country,Density\n(P/Km2),Abbreviation,Agricultural Land( %),Land Area(Km2),Armed Forces size,Birth Rate,Calling Code,Capital/Major City,Co2-Emissions,...,Out of pocket health expenditure,Physicians per thousand,Population,Population: Labor force participation (%),Tax revenue (%),Total tax rate,Unemployment rate,Urban_population,Latitude,Longitude
0,Afghanistan,60,AF,58.10%,652230,323000,32.49,93.0,Kabul,8672,...,78.40%,0.28,38041754,48.90%,9.30%,71.40%,11.12%,9797273,33.939110,67.709953
1,Albania,105,AL,43.10%,28748,9000,11.78,355.0,Tirana,4536,...,56.90%,1.20,2854191,55.70%,18.60%,36.60%,12.33%,1747593,41.153332,20.168331
2,Algeria,18,DZ,17.40%,2381741,317000,24.28,213.0,Algiers,150006,...,28.10%,1.72,43053054,41.20%,37.20%,66.10%,11.70%,31510100,28.033886,1.659626
4,Angola,26,AO,47.50%,1246700,117000,40.73,244.0,Luanda,34693,...,33.40%,0.21,31825295,77.50%,9.20%,49.10%,6.89%,21061025,-11.202692,17.873887
6,Argentina,17,AR,54.30%,2780400,105000,17.02,54.0,Buenos Aires,201348,...,17.60%,3.96,44938712,61.30%,10.10%,106.30%,9.79%,41339571,-38.416097,-63.616672
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
185,United Kingdom,281,GB,71.70%,243610,148000,11.00,44.0,London,379025,...,14.80%,2.81,66834405,62.80%,25.50%,30.60%,3.85%,55908316,55.378051,-3.435973
186,United States,36,US,44.40%,9833517,1359000,11.60,1.0,"Washington, D.C.",5006302,...,11.10%,2.61,328239523,62.00%,9.60%,36.60%,14.70%,270663028,37.090240,-95.712891
187,Uruguay,20,UY,82.60%,176215,22000,13.86,598.0,Montevideo,6766,...,16.20%,5.05,3461734,64.00%,20.10%,41.80%,8.73%,3303394,-32.522779,-55.765835
191,Vietnam,314,VN,39.30%,331210,522000,16.75,84.0,Hanoi,192668,...,43.50%,0.82,96462106,77.40%,19.10%,37.60%,2.01%,35332140,14.058324,108.277199


In [4]:
print(df.dtypes)

Country                                       object
Density\n(P/Km2)                              object
Abbreviation                                  object
Agricultural Land( %)                         object
Land Area(Km2)                                object
Armed Forces size                             object
Birth Rate                                   float64
Calling Code                                 float64
Capital/Major City                            object
Co2-Emissions                                 object
CPI                                           object
CPI Change (%)                                object
Currency-Code                                 object
Fertility Rate                               float64
Forested Area (%)                             object
Gasoline Price                                object
GDP                                           object
Gross primary education enrollment (%)        object
Gross tertiary education enrollment (%)       

In [5]:
# Remove commas from 'Population' column and convert it to numeric
df['Population'] = pd.to_numeric(df['Population'].str.replace(',', ''))

# Remove '%' and ',' characters and replace '.' with an empty string, then convert to numeric
df['Agricultural Land( %)'] = pd.to_numeric(df['Agricultural Land( %)'].str.replace('[%.,]', '', regex=True))/100
df['CPI Change (%)'] = pd.to_numeric(df['CPI Change (%)'].str.replace('[%.,]', '', regex=True))/100

# Remove '$' sign and any spaces, then convert to numeric
df['Minimum wage'] = pd.to_numeric(df['Minimum wage'].str.replace('[\$,]', '', regex=True).str.strip())

# Remove '%' and '.' characters, then convert to numeric
df['Tax revenue (%)'] = pd.to_numeric(df['Tax revenue (%)'].str.replace('[%.,]', '', regex=True))/100

# Remove '%' and any commas, then convert to numeric
df['Unemployment rate'] = pd.to_numeric(df['Unemployment rate'].str.replace('[%,]', '', regex=True))/100

In [6]:
# Bar chart for Top 10 countries with highest population
top_10_populated = df.nlargest(10, 'Population')
fig2 = px.bar(top_10_populated, x='Country', y='Population', title='Top 10 Most Populated Countries',
              labels={'Country': 'Country', 'Population': 'Population'})
fig2.show()


In [7]:
fig = px.scatter_mapbox(df,
                        lon=df['Longitude'],
                        lat=df['Latitude'],
                        color=df['Life expectancy'],
                        size=df['Physicians per thousand'],
                        hover_name=df['Country'],
                        zoom=0.1,
                        width=1200, height=900,
                        title="Life Expectancy and Physicians per thousand Scatter Map")

# Set the Mapbox style to 'open-street-map'
fig.update_layout(mapbox_style="open-street-map")

# Update the layout margins
fig.update_layout(margin={"r": 0, "t": 50, "l": 0, "b": 10})

# Show the scatter map
fig.show()

In [9]:
fig = px.scatter_mapbox(df,
                        lon=df['Longitude'],
                        lat=df['Latitude'],
                        color=df['Minimum wage'],
                        size=df['Unemployment rate'],
                        hover_name=df['Country'],
                        zoom=0.1,
                        width=1200, height=900,
                        title="Minimum wage and Unemployment rate Scatter Map")

# Set the Mapbox style to 'open-street-map'
fig.update_layout(mapbox_style="open-street-map")

# Update the layout margins
fig.update_layout(margin={"r": 0, "t": 50, "l": 0, "b": 10})

# Show the scatter map
fig.show()

import plotly.express as px

# Create the scatter map on Mapbox for life expectancy data
fig = px.scatter_mapbox(df,
                        lon='Longitude',
                        lat='Latitude',
                        color='Agricultural Land( %)',
                        size='Unemployment rate',
                        hover_name='Country',
                        hover_data={'Agricultural Land( %)': True, 'Unemployment rate': True},
                        color_continuous_scale='Viridis',
                        size_max=15,  # Reduce the maximum marker size for the scatter plot
                        zoom=1.5,
                        width=1000,
                        height=600,
                        title="Agricultural Land(%) and Unemployment rate Scatter Map")

# Set the Mapbox style to 'open-street-map'
fig.update_layout(mapbox_style="open-street-map")

# Add a color scale legend
fig.update_layout(coloraxis_colorbar_title="Agricultural Land (%)")

# Apply a logarithmic scale for the size of the markers
fig.update_traces(marker=dict(sizemode='diameter', sizeref=df['Unemployment rate'].max() / 200))

# Update the hover template to customize the information displayed on hover
fig.update_traces(hovertemplate="<b>%{hovertext}</b><br>" +
                                "Agricultural Land (%): %{customdata[0]:.2f}<br>" +
                                "Unemployment rate: %{customdata[1]:.2f}<extra></extra>")

# Update the layout margins
fig.update_layout(margin={"r": 10, "t": 50, "l": 10, "b": 10})

# Show the scatter map
fig.show()
