In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from scipy.stats import pearsonr
from plotly.subplots import make_subplots


df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Final project/world-population.csv')

data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Final project/world-population.csv')



# World Population Over Time
world_population = data.groupby('Year')['World Population'].max().reset_index()

fig = px.line(world_population, x='Year', y='World Population',
              title='World Population Over Time',
              labels={'World Population': 'World Population'},
              template='plotly_dark')

fig.update_traces(line=dict(color='green', width=2))
fig.show()

# Understanding how population is distributed across different countries.
latest_year = data['Year'].max()
latest_data = data[data['Year'] == latest_year]

fig2 = px.choropleth(latest_data, locations='country', locationmode='country names',
                     color='Population', hover_name='country', color_continuous_scale=px.colors.sequential.Plasma,
                     title='Population by Country')

fig2.show()

# Bar plot for median age by country
median_age_by_country = latest_data.sort_values('Median Age', ascending=False)
fig3 = px.bar(median_age_by_country, x='country', y='Median Age',
              title='Median Age by Country',
              template='plotly_dark')

fig3.update_traces(marker_color='orange')
fig3.show()

# Scatter plot to explore relationship between fertility rate and population density
fig4 = px.scatter(data, x='Fertility Rate', y='Density (P/Km²)', size='Population', color='country',
                  hover_name='country', animation_frame='Year',
                  title='Relationship between Fertility Rate and Population Density',
                  template='plotly_dark')

fig4.update_traces(marker=dict(sizemode='diameter'))
fig4.show()

# Time series of yearly population change
yearly_change = data.groupby(['Year', 'country'])['Yearly  Change'].mean().reset_index()
fig5 = px.line(yearly_change, x='Year', y='Yearly  Change', color='country',
               title='Yearly Population Change by Country Over Time',
               template='plotly_dark')

fig5.show()

# Removing rows with missing or invalid values in 'Density (P/Km²)' and 'Fertility Rate'
cleaned_data = data.dropna(subset=['Density (P/Km²)', 'Fertility Rate'])

# Now calculating the correlation
correlation, _ = pearsonr(cleaned_data['Density (P/Km²)'], cleaned_data['Fertility Rate'])

# Creating scatter plot for density vs fertility rate
fig6 = make_subplots(rows=1, cols=1)
trace = go.Scatter(x=cleaned_data['Density (P/Km²)'], y=cleaned_data['Fertility Rate'], mode='markers',
                   marker=dict(size=cleaned_data['Population'] / 1e6, color=cleaned_data['Fertility Rate'], colorscale='Viridis', sizemode='diameter'),
                   text=cleaned_data['country'], name='Density vs Fertility Rate')

fig6.add_trace(trace)
fig6.update_layout(title=f'Density vs Fertility Rate (Correlation = {correlation:.2f})',
                   xaxis=dict(title='Density (P/Km²)'), yaxis=dict(title='Fertility Rate'),
                   template='plotly_dark')

fig6.show()

# Bar plot for Urban Population Percentage by country
urban_pop_by_country = latest_data.sort_values('Urban  Pop %', ascending=False)
fig7 = px.bar(urban_pop_by_country, x='country', y='Urban  Pop %',
              title='Urban Population Percentage by Country',
              template='plotly_dark')

fig7.update_traces(marker_color='blue')
fig7.show()

# Scatter plot to explore relationship between median age and fertility rate by country
fig8 = px.scatter(latest_data, x='Median Age', y='Fertility Rate', size='Population', color='country',
                  hover_name='country',
                  title='Relationship between Median Age and Fertility Rate by Country',
                  template='plotly_dark')

fig8.update_traces(marker=dict(sizemode='diameter'))
fig8.show()

# Line plot for population change in the top 5 most populous countries over time
top_countries = latest_data.nlargest(5, 'Population')['country'].tolist()
top_countries_data = data[data['country'].isin(top_countries)]

fig9 = px.line(top_countries_data, x='Year', y='Population', color='country',
               title='Population Change Over Time for Top 5 Most Populous Countries',
               template='plotly_dark')

fig9.show()




#The graphs collectively provide a comprehensive understanding of global population trends and their characteristics. Here's a summary of the findings:

#**Increasing Global Population**: The world's population is steadily increasing, which can be attributed to factors such as advancements in healthcare and technology.

#**Population Distribution**: Population distribution varies significantly across countries, with China and India having notably high populations. Understanding this distribution is essential for global resource allocation and policy-making.

#**Aging Populations in Certain Countries**: Some countries have a high median age indicating an aging population, which can have implications on social security and healthcare systems.

#**Correlation Between Fertility Rate and Population Density**: There’s a negative correlation between fertility rates and population density - as population density increases, fertility rates tend to decrease. This suggests that densely populated areas may have preferences for smaller family sizes.

#**Urbanization as an Indicator of Economic Development**: Countries with higher urban population percentages tend to be more industrialized. Urbanization patterns can be indicative of economic development and shifts in economic structures.

#**Variations in Population Growth Rates**: Different countries experience varying population growth rates due to economic, political, and healthcare factors. Understanding these variations can help predict future demographic changes and their potential impacts.

#**Correlation Between Median Age and Fertility Rate**: A negative correlation between median age and fertility rate was observed. Countries with a younger population tend to have higher fertility rates and vice versa.

#By analyzing these graphs, we can understand not only the global population trends but also the factors that might be influencing these trends. For policymakers, understanding these trends is crucial in making informed decisions regarding resource allocation, economic development, and social welfare. For businesses, this information might be essential for market analysis, expansion, or investment decisions. For researchers and the general public, it contributes to a broader understanding of global demographics and societal trends.



