In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objs as go
import folium
import pandas as pd
import requests
import json
import squarify
import seaborn as sns
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/immigration-to-canada/canadian_immegration_data.csv


An overview of the immigration data in the canadian_immegration_data.csv file

In [2]:
df = pd.read_csv('/kaggle/input/immigration-to-canada/canadian_immegration_data.csv')
# Display the first five rows of the dataset
print('First 5 Rows of Immigration Data:')
print(df.head())

# Display the shape of the dataset
print('Data Shape:')
print(df.shape)

# Display the data types of each column
print('Data Types:')
print(df.dtypes)

# Display summary statistics of the dataset
print('Summary Statistics:')
print(df.describe())

# Display the number of missing values in each column
print('Missing Values:')
print(df.isnull().sum())

First 5 Rows of Immigration Data:
          Country Continent           Region             DevName  1980  1981  \
0     Afghanistan      Asia    Southern Asia  Developing regions    16    39   
1         Albania    Europe  Southern Europe   Developed regions     1     0   
2         Algeria    Africa  Northern Africa  Developing regions    80    67   
3  American Samoa   Oceania        Polynesia  Developing regions     0     1   
4         Andorra    Europe  Southern Europe   Developed regions     0     0   

   1982  1983  1984  1985  ...  2005  2006  2007  2008  2009  2010  2011  \
0    39    47    71   340  ...  3436  3009  2652  2111  1746  1758  2203   
1     0     0     0     0  ...  1223   856   702   560   716   561   539   
2    71    69    63    44  ...  3626  4807  3623  4005  5393  4752  4325   
3     0     0     0     0  ...     0     1     0     0     0     0     0   
4     0     0     0     0  ...     0     1     1     0     0     0     0   

   2012  2013  Total  
0  26

In [3]:
# Get total immigration by year
total_by_year = df.set_index('Country').loc[:, '1980':'2013'].sum(axis=0)
# Calculate linear regression line
x = total_by_year.index.astype(int)  # convert year strings to integers
y = total_by_year.values
slope, intercept = np.polyfit(x, y, 1)
trend_line = slope * x + intercept

# Create Plotly figure
fig = go.Figure()

# Add bar trace for total immigration
fig.add_trace(go.Bar(
    name="Total Immigration",
    x=total_by_year.index,
    y=total_by_year.values,
    xperiodalignment="middle"))

# Set the color of the bars
fig.update_traces(marker_color='rgb(4, 194, 166)', 
#                   marker_line_color='rgb(8,48,107)',
#                   marker_line_width=1.5, opacity=0.6
                 )

# Add line trace for total immigration
fig.add_trace(go.Scatter(
    name="Total Immigration",
    mode="lines+markers",
    x=total_by_year.index,
    y=total_by_year.values,
    marker=dict(symbol="star")
))

# Add trend line trace to figure
fig.add_trace(go.Scatter(
    name="Trend Line",
    mode="lines",
    x=total_by_year.index,
    y=trend_line,
    line=dict(color="blue", dash="dot")
))
# Set axis labels and title
fig.update_xaxes(title="Year", showgrid=True, ticklabelmode="period")
fig.update_yaxes(title="Total Number of Immigrants")
fig.update_layout(title="Total Immigration to Canada from 1980 to 2013",
                  xaxis=dict(
                      title='Year',
                      tickangle=50  # Set the angle of the tick labels to 50 degrees
                  ))

# Show figure
fig.show()

This chart shows the total immigration to Canada from 1980 to 2013. There are 2 ways to interact on the chart, you can click the 2 'Total Immigration' buttons in the top right corner to toggle if you want to view in bar or line format is fine.
Overall, total immigration to Canada increased from 1980 with 99,137k to 2013 reaching 257,537k, with fluctuations in the middle affecting the overall shape, peaking in 2010 with 276,956k. It's interesting to note that the overall trend is upward, with total immigration more than doubling from 1980 to 2013.

In [4]:
fig = px.histogram(df, x='Total', nbins=50, title='Histogram of Total Immigrants from 1980 to 2013')
fig.update_layout(xaxis_title='Number of Immigrants', yaxis_title='Number of Countries')
fig.show()

This histogram shows the number of countries distributed by total immigrants to Canada from 1980 to 2013. Under 100k immigrants are the most with more than 140 countries. There are only 4 countries with over 500k immigrants.

In [5]:
top_5_countries = df[df['Total'].isin(df['Total'].nlargest(5))].sort_values('Total',ascending=False)

# Create Plotly figure
fig = go.Figure()

# Loop through each country and add a trace to the figure 
for country in top_5_countries['Country']:
    # Get the immigration data for the country
    data = top_5_countries.loc[top_5_countries['Country']==country,'1980':'2013']
    # Create a line trace for the data
    trace = go.Scatter(x=data.columns,y=data.values[0],name=country)
    # Add the trace to the figure
    fig.add_trace(trace)
# Set the title and axis labels for the figure
fig.update_layout(title='Immigration from Top 5 Countries (1980-2013)',
                 xaxis_title='Year',yaxis_title='Number of Immigrants',
                 xaxis=dict(
                      title='Year',
                      tickangle=50  # Set the angle of the tick labels to 50 degrees
                 ))
# Show the figure
fig.show()

This chart displays the migration trends of the top 5 countries from 1980 to 2013. The migration trend of the United Kingdom shows an initial upward trend, reaching a peak in 1994, followed by a significant decline, dropping below 10k in the years that followed.

On the other hand, the migration trends of India, China, the Philippines, and Pakistan show more gradual growth until around 1993, with each country having its unique pattern. China's migration trend reached a peak of 42,584k in 2005, after which it declined. In contrast, the Philippines had its peak migration in 2010, reaching 38,617k.

Overall, we observe an increasing trend in migration for these top 5 countries, but there is also a significant fluctuation in the migration trends over time, reflecting the various economic and political factors that drive migration patterns.

In [6]:
# Select the top 10 countries based on total immigration
top_10_countries = df[df['Total'].isin(df['Total'].nlargest(10))].sort_values('Total',ascending=True)

# Create a bar trace for the total immigration of each country
trace = go.Bar(x=top_10_countries['Country'], y=top_10_countries['Total'])

# Create a figure and add the bar trace to it
fig = go.Figure(data=[trace])

# Set the title and axis labels for the figure
fig.update_layout(title='Total Immigration from Top 10 Countries', 
                  xaxis_title='Country', yaxis_title='Number of Immigrants')

# Show the figure
fig.show()

This chart shows the total immigration figures for the top 10 countries, with the top 4 countries having over 500k immigrants each. The Philippines had a total of 511,391k immigrants, while the United Kingdom had 551.5k immigrants. China had 659,962k immigrants, and India had the highest number of immigrants among the top 10 countries, with a total of 691,904k.

In [7]:
# Get the top 10 countries by Total column
top10_countries = df.sort_values(by='Total', ascending=False).head(10)

# Create a treemap
fig = px.treemap(top10_countries, path=['Country'], values='Total', color='Country',
                 color_discrete_sequence=px.colors.qualitative.Pastel)

# Update the layout
fig.update_layout(title='Top 10 Countries by Total Immigrants to Canada (1980-2013)')

# Increase the font size of the labels
fig.update_traces(textfont_size=18)

# Show the chart
fig.show()


Another treemap chart shows the top countries by total immigrants to Canada (1980-2013)

In [8]:
# Download countries geojson file
URL = 'https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DV0101EN-SkillsNetwork/Data%20Files/world_countries.json'
r = requests.get(URL)
world_geo = r.json()

# Dictionary mapping incorrect country names to correct names
country_map = {'Bolivia (Plurinational State of)': 'Bolivia',
               "Democratic People's Republic of Korea": 'North Korea',
               'Guinea-Bissau': 'Guinea Bissau',
               'Iran (Islamic Republic of)': 'Iran',
               'Congo': 'Republic of the Congo',
               'Venezuela (Bolivarian Republic of)': 'Venezuela',
               "Côte d'Ivoire": 'Ivory Coast',
               'United Kingdom of Great Britain and Northern Ireland': 'United Kingdom',
               'Viet Nam': 'Vietnam',
               'Serbia': 'Republic of Serbia',
               'The former Yugoslav Republic of Macedonia': 'Macedonia',
               'Brunei Darussalam': 'Brunei',
               'Syrian Arab Republic': 'Syria',
               'Bahamas': 'The Bahamas',
               'Republic of Korea': 'South Korea',
               "Lao People's Democratic Republic": 'Laos',
               'Republic of Moldova': 'Moldova'
              }

# Use map() method to replace incorrect country names with correct names
df['Country'] = df['Country'].map(country_map).fillna(df['Country'])

# Create a map centered on Canada
map = folium.Map(location=[0, 0], zoom_start=2)

# Add the choropleth layer
folium.Choropleth(
    geo_data=world_geo,
    name='choropleth',
    data=df,
    columns=['Country', 'Total'],
    key_on='feature.properties.name',
    fill_color='YlOrRd',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Total Immigrants',
).add_to(map)

# Add a layer control
folium.LayerControl().add_to(map)

# Display the map
map


ConnectionError: HTTPSConnectionPool(host='cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud', port=443): Max retries exceeded with url: /IBMDeveloperSkillsNetwork-DV0101EN-SkillsNetwork/Data%20Files/world_countries.json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f8a2caf5150>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution'))

A choropleth map has been created using the Folium library in Python to display the total immigration figures for all countries in the world. The map uses color-coding to indicate the immigration figures, with the redder shade indicating higher immigration numbers.

This type of visualization provides a geospatial perspective of where immigrants originate from, making it easier to identify regions or countries with the highest immigration rates.

In [None]:
region_totals = df.groupby(['Region'])['Total'].sum().reset_index()
region_totals.sort_values('Total',ascending=False,inplace=True)
# Create a bar chart of the regions
fig = px.bar(region_totals, x='Total', y='Region', color='Region', 
             orientation='h',
             title='Immigration to Canada by Region from 1980 to 2013',
             color_continuous_scale='matter'
            )
fig.show()

From the chart, we see a breakdown of immigrants to Canada by region from 1980 to 2013. The data shows that more than half a million immigrants came from four regions: South Asia, East Asia, Southeast Asia, and Northern Europe. In contrast, the number of immigrants from areas such as Australia and New Zealand, Melanesia, Central Asia, Polynesia, and Micronesia was lower, with under 40,000 immigrants from these regions during the same period.

This pattern of immigration also reflects population density in different regions. For example, regions with higher population densities, such as Southern Asia, have contributed more immigrants to Canada than regions with lower population densities, such as Australia and New Zealand, Melanesia, Central Asia, Polynesia, and Micronesia.

In [None]:
# Group the data by development status and year, and sum the total immigrants
dev_df = df.groupby(['DevName']).sum(numeric_only=True).transpose().reset_index().rename(columns={'index':'Year'})
# drop the last row
dev_df = dev_df.drop(dev_df.index[-1])
# Create a line chart of the total immigrants by development status and year
fig = go.Figure()
fig.add_trace(go.Scatter(x=dev_df['Year'], 
                         y=dev_df['Developing regions'],
                         mode = 'lines',
                         name='Developing regions'
                        ))
fig.add_trace(go.Scatter(x=dev_df['Year'], 
                         y=dev_df['Developed regions'],
                         mode = 'lines',
                         name='Developed regions'
                        ))

fig.update_layout(title='Total Immigrants by Development Status from 1980 to 2013',
                  xaxis_title='Year', yaxis_title='Number of Immigrants',
                  xaxis=dict(
                      title='Year',
                      tickangle=50  # Set the angle of the tick labels to 50 degrees
                  ))
fig.show()

This line graph shows the difference in the number of people immigrating to Canada from 1980 to 2013 between developed and developing countries. While developed countries hovered around 50k during this period, developing countries quadrupled the number of migrants, reaching more than 200k in migration in 2013.

In [None]:
fig = px.sunburst(df, path=['DevName', 'Continent', 'Region'], values='Total')
fig.update_layout(
    height=800,
    width=800
)
fig.show()

This sunburst chart is an extension of the previous line chart, giving us a more detailed look at the distribution of immigrants from developed and developing countries compared to total immigration to Canada. The donut circle next to the center are the continents, and next are the regions within these continents. Developing regions account for nearly three-quarters of that of developed regions. The countries of the island nation type, central Asia and East Asia account for a very small number compared to other countries.

In [None]:
df_continent = df.groupby(['Continent'])['Total'].sum().reset_index().sort_values('Total', ascending=False)
fig = px.bar(df_continent, x='Continent', y='Total', color='Continent', title='Total Immigrants by Continent from 1980 to 2013')
fig.update_layout(xaxis_title='Continent', yaxis_title='Number of Immigrants')
fig.show()
fig = px.pie(values=df_continent['Total'], names=df_continent['Continent'])
fig.show()

The bar graph and pie chart illustrate the total immigration to Canada from different continents between 1980 and 2013. The data shows that Asia accounted for the largest number of immigrants, with about 3.3 million individuals, representing over 51.8% of all immigrants to Canada during this period. Europe was the second largest contributor with around 1.4 million immigrants, which corresponds to roughly 22% of all immigrants.

In contrast, the smallest numbers of immigrants came from Northern America and Oceania, with 241,142k and 55,174k respectively, accounting for only 3.76% and 0.861% of all immigrants to Canada. The pie chart highlights the differences in the distribution of immigrants across the continents, with Asia comprising more than half of all immigrants.

In [None]:
# Group the data by continent and sum the total immigrants
continent_totals = df.groupby('Continent').sum().sort_values('Total', ascending=False)

# Create a list of x-axis labels
x_labels = [str(year) for year in range(1980, 2014)]

# Create a line chart for each continent
data = []
for continent in continent_totals.index:
    trace = go.Scatter(
        x=x_labels,
        y=continent_totals.loc[continent],
        mode='lines',
        name=continent
    )
    data.append(trace)

# Create the layout for the chart
layout = go.Layout(
    title='Total Immigration by Continent from 1980 to 2013',
    xaxis=dict(
        title='Year',
        tickangle=50  # Set the angle of the tick labels to 50 degrees
    ),
    yaxis=dict(title='Number of Immigrants')
)

# Create the figure and plot the chart
fig = go.Figure(data=data, layout=layout)
fig.show()

The line graph depicts the number of immigrants who migrated to Canada from different continents between 1980 and 2013. The data shows that in 1980, Europe had the highest number of immigrants with nearly 40k, followed by Asia with approximately 31k. From 1980 to 1985, the number of immigrants from all continents decreased slightly before starting to increase again.

Since 1985, the number of immigrants from Asia, Europe, and Latin America and the Caribbean has increased, with the most significant increases observed in Asia and Europe. Asia had a noticeable increase, reaching its highest peak in 2010 with nearly 164k immigrants. In contrast, Europe and Latin America and the Caribbean experienced a decreasing trend from 1992 onwards.

Interestingly, the number of immigrants from Africa has continuously increased from 1980 to 2013. In contrast, Northern America and Oceania have maintained steady levels below 11k and 3k, respectively, throughout this period.

In [None]:
import plotly.express as px

def plot_continent_scatter(continent,color_continuous_scale=None):
    continent_df = df[df['Continent'] == continent]
    x_vals = continent_df.columns[4:-1]
    y_vals = continent_df.iloc[:, 4:-1].sum(axis=0)
    
    fig = px.scatter(x=x_vals, y=y_vals, color=y_vals, size=y_vals, title=f'Total Immigrants from {continent} from 1980 to 2013',
                     labels={'x': 'Year', 'y': 'Number of Immigrants'},
                     color_continuous_scale=color_continuous_scale,
                     trendline='ols'
                    )
    fig.update_layout(xaxis={'tickangle': 50})
    fig.show()

# Plot scatter charts for each continent
plot_continent_scatter('Asia',color_continuous_scale='matter')
plot_continent_scatter('Africa',color_continuous_scale='magenta')
plot_continent_scatter('Latin America and the Caribbean',color_continuous_scale='sunset')
plot_continent_scatter('Europe',color_continuous_scale='darkmint')


These four scatter charts represent immigration to Canada from four different continents: Asia, Africa, Europe, and Latin America and the Caribbean. The charts share a common feature where each circle dot represents the number of immigrants for a given year, and the size and color of the circle dot correspond to the number of immigrants. Specifically, larger and darker circle dots indicate a higher number of immigrants.

Moreover, each scatter chart has a trendline that shows the overall trend from 1980 to 2013. The trendline allows us to see whether the number of immigrants is increasing or decreasing over time. In general, we observe that Asia, Africa, and Latin America and the Caribbean have an upward trend, while Europe has a tendency to decrease despite some periods of rapid growth, such as from 1985 to 1990.

Overall, these scatter charts provide a more detailed and nuanced view of immigration to Canada than the previous chart that displayed the total immigration numbers by continent. The scatter charts allow us to see the year-to-year fluctuations and the trends over time for each continent.

In [None]:
# Filter the dataset to include only the records from the Asia continent
asia_df = df[df['Continent'] == 'Asia']

# Group the records by country and sum the immigration values across all years
asia_grouped = asia_df.groupby('Country')['Total'].sum()

# Select the top 5 countries by total immigration
asia_top5 = asia_grouped.nlargest(5)

# Filter the data to include only the top 5 countries
top_5_df = asia_df[asia_df['Country'].isin(asia_top5.index)]

# Create a line plot for each of the top 5 countries to show the trend of immigration over the years
fig = go.Figure()
for country in top_5_df['Country'].unique():
    df_country = top_5_df[top_5_df['Country'] == country]
    fig.add_trace(go.Scatter(x=df_country.columns[4:-1], y=df_country.iloc[:, 4:-1].sum(axis=0), mode='lines', name=country))

fig.update_layout(title='Top 5 Immigration Countries from Asia from 1980 to 2013',
                  xaxis_title='Year', yaxis_title='Number of Immigrants',
                  xaxis=dict(
                      title='Year',
                      tickangle=50  # Set the angle of the tick labels to 50 degrees
                  )
                 )
fig.show()

The chart provides insights into the immigration patterns of the top 5 Asian countries to Canada from 1980 to 2013. It is clear that China, India, and the Philippines are the top three countries with the most immigrants during this period. Despite some fluctuations, these three countries generally exhibit a consistent upward trend in the number of immigrants, with the Philippines experiencing a significant decline from 1993 to 1998. Iran and Pakistan also show an increasing trend, albeit with some fluctuations, with both countries having around 12,000 immigrants in 2013 compared to around 1,000 in 1980. These trends suggest that Canada has become an increasingly attractive destination for immigrants from Asia, particularly from China, India, and the Philippines.

In [None]:
# Filter the dataset to include only the records from the Africa continent
africa_df = df[df['Continent'] == 'Africa']

# Group the records by country and sum the immigration values across all years
africa_grouped = africa_df.groupby('Country')['Total'].sum()

# Select the top 5 countries by total immigration
africa_top5 = africa_grouped.nlargest(5)

# Filter the data to include only the top 5 countries
top_5_df = africa_df[africa_df['Country'].isin(africa_top5.index)]

# Create a line plot for each of the top 5 countries to show the trend of immigration over the years
fig = go.Figure()
for country in top_5_df['Country'].unique():
    df_country = top_5_df[top_5_df['Country'] == country]
    fig.add_trace(go.Scatter(x=df_country.columns[4:-1], y=df_country.iloc[:, 4:-1].sum(axis=0), mode='lines', name=country))

fig.update_layout(title='Top 5 Immigration Countries from Africa from 1980 to 2013',
                  xaxis_title='Year', yaxis_title='Number of Immigrants',
                  xaxis=dict(
                      title='Year',
                      tickangle=50  # Set the angle of the tick labels to 50 degrees
                  )
                 )
# Add text annotations
fig.add_annotation(x=10.8, y=4000, xref='x', yref='y',
                   text='Somalia Civil War and Famine',
                   font=dict(
                       family="sans serif",
                       size=16,
                       color='purple'),
                   textangle=-80,
                   showarrow=False
                  )
fig.show()

The data in this chart presents the historical progression of emigration from the top 5 African countries from 1980 to 2013. The results indicate an overall rising trend of emigration from these countries during this period. However, the most significant increases in emigration were recorded in Algeria, Egypt, and Morocco.

Somalia stands out as an exceptional case, with a substantial surge in emigration from 401 individuals in 1989 to 5,794 individuals in 1992. This period coincides with political turmoil and conflict that may have pushed individuals to seek refuge in Canada. Likewise, in the cases of Egypt and Morocco, the data suggests that economic and political challenges may have motivated some individuals to move to Canada, with a remarkable surge in emigration in 2010, with around 6,000 individuals compared to only a few hundred in 1980.

On the other hand, South Africa experienced a moderate increase in emigration, with notable fluctuations from year to year. Overall, the findings indicate that the most significant increases in emigration from Africa occurred in Algeria, Egypt, and Morocco, which could be attributed to various factors such as political instability, economic hardships, or a desire for better opportunities.

In [None]:
# Filter the dataset to include only the records from the Latin America and the Caribbean continent
europe_df = df[df['Continent'] == 'Latin America and the Caribbean']

# Group the records by country and sum the immigration values across all years
europe_grouped = europe_df.groupby('Country').sum(numeric_only=True)

# Sort the resulting dataframe in descending order based on the total immigration values
europe_top5 = europe_grouped.sort_values('Total', ascending=False).head(5)

# Create a line plot for each of the top 5 countries to show the trend of immigration over the years
fig = go.Figure()
for country in europe_top5.index:
    x_vals = europe_top5.columns[:-1]
    y_vals = europe_top5.loc[country][:-1]
    fig.add_trace(go.Scatter(x=x_vals, y=y_vals, mode='lines', name=country))
fig.update_layout(title='Top 5 Immigration Countries from Latin America and the Caribbean from 1980 to 2013', 
                  xaxis_title='Year', yaxis_title='Number of Immigrants',
                  xaxis=dict(
                      title='Year',
                      tickangle=50  # Set the angle of the tick labels to 50 degrees
                  )
                 )
# Add text annotations
fig.add_annotation(x=29.5, y=5200, xref='x', yref='y',
                   text='2010 Haiti Earthquake',
                   font=dict(
                       family="sans serif",
                       size=16,
                       color='red'),
                   textangle=-76,
                   showarrow=False
                  )
# Add text annotations
fig.add_annotation(x=22.5, y=4700, xref='x', yref='y',
                   text='Colombia Civil Wars 2000-2006',
                   font=dict(
                       family="sans serif",
                       size=16,
                       color='purple'),
                   textangle=-63,
                   showarrow=False
                  )
fig.show()

This line chart portrays the immigration trends from five countries from Latin America and the Caribbean to Canada, highlighting that the most notable and pronounced increase in immigration occurred in Haiti, Colombia, and Mexico.

Colombia experienced a period of internal conflict, drug trafficking, and violence from 1999 to 2006, which forced many Colombians to seek refuge in Canada. The conflict intensified during this period, leading to widespread violence, human rights abuses, and displacement of civilians.

Haiti also saw a surge in immigration to Canada from 2009 to 2011, mainly due to a series of significant events, including a devastating earthquake in January 2010 that killed an estimated 200,000 people and displaced over a million others, a cholera outbreak that started in October 2010 and spread quickly throughout the country, and a presidential election that led to violence and dependency.

In contrast, Jamaica and Guyana showed a relatively high number of immigrants to Canada in 1980, with around 3,000 people. However, the numbers fluctuated significantly, increasing sharply until 1995 and then fluctuating slightly and tending to decrease after 2000. Jamaica faced high inflation and debt, while Guyana was dealing with a declining economy and political unrest in 1987.

Overall, the line chart indicates that the most significant increases in immigration to Canada came from Haiti, Colombia, and Mexico, which could be attributed to various factors such as political instability, economic hardship, and natural disasters.

In [None]:
# Filter the dataset to include only the records from the Europe continent
europe_df = df[df['Continent'] == 'Europe']

# Group the records by country and sum the immigration values across all years
europe_grouped = europe_df.groupby('Country').sum(numeric_only=True)

# Sort the resulting dataframe in descending order based on the total immigration values
europe_top5 = europe_grouped.sort_values('Total', ascending=False).head(5)

# Create a line plot for each of the top 5 countries to show the trend of immigration over the years
fig = go.Figure()
for country in europe_top5.index:
    x_vals = europe_top5.columns[:-1]
    y_vals = europe_top5.loc[country][:-1]
    fig.add_trace(go.Scatter(x=x_vals, y=y_vals, mode='lines', name=country))
fig.update_layout(title='Top 5 Immigration Countries from Europe from 1980 to 2013', 
                  xaxis_title='Year', yaxis_title='Number of Immigrants',
                  xaxis=dict(
                      title='Year',
                      tickangle=50  # Set the angle of the tick labels to 50 degrees
                  )
                 )
fig.show()

The line chart represents the immigration trends of the top 5 European countries to Canada from 1980 to 2013. The data shows that these countries experienced the most substantial volatility and increase in immigration during this period. Notably, the UK had the highest number of migrants compared to the other European countries, with fluctuations and increases peaking in 1994 at around 39,231k migrants.

The increase in immigration can be attributed to several factors, including economic challenges faced by the UK during the 1980s and early 1990s, changes in Canada's immigration policies that made it easier for skilled workers and professionals to immigrate, and the strong cultural and historical connection between Canada and the UK, which may have made Canada a more appealing destination for UK immigrants who were looking to maintain cultural and familial ties.

On the other hand, France and Romania experienced slow growth and remained at around 5,000 immigrants from 1980 to 2013. This could be due to several factors, including differences in culture and language, limited job opportunities, and strict immigration policies in both countries. Overall, the data suggests that the United Kingdom, Poland, France, Romania, and Portugal were the top European countries for immigration to Canada, with the UK being the most significant contributor to the trend.

In [None]:
country_totals = df[['Country', 'Total']]
top_15_countries = country_totals.sort_values('Total', ascending=False)[:15]

fig = go.Figure()
fig.add_trace(go.Bar(x=top_15_countries['Total'], y=top_15_countries['Country'], 
                     orientation='h'
                    ))
fig.update_layout(title={'text': 'Top 15 Countries with Total Immigrants to Canada from 1980 to 2013',
                         'y':0.9, 'x':0.5, 'xanchor': 'center', 'yanchor': 'top'},
                  xaxis_title='Number of Immigrants', yaxis_title='Country',
                  title_font_size=16, xaxis_tickfont_size=12
                 )
# Set the color of the bars
fig.update_traces(marker_color='rgb(4, 131, 194)', marker_line_color='rgb(8,48,107)',
                  marker_line_width=1.5, opacity=0.6)
fig.show()

The horizontal bar chart shows the top 15 countries with the largest number of immigrants to Canada from 1980 to 2013. The Philippines, the United Kingdom, China, and India are the top four countries with the highest number of immigrants to Canada from 1980 to 2013, with more than 500,000 immigrants each. China and India had the highest number of immigrants among the top 15 countries, with 659,962 and 691,904 immigrants, respectively. On the other hand, Romania and Vietnam had fewer than 100,000 immigrants each during this period.

In [None]:
# Step 1: Get the data
iceland_df = df[df['Country'] == 'Iceland']

x_vals = iceland_df.columns[4:-1]
y_vals = iceland_df.iloc[:, 4:-1].sum(axis=0)
# Create the trace
trace = go.Bar(x=x_vals, y=y_vals)
# Create the layout
layout = go.Layout(title='Icelandic Immigrants to Canada from 1980 to 2013',
                   xaxis=dict(
                      title='Year',
                      tickangle=50  # Set the angle of the tick labels to 50 degrees
                  ),
                   yaxis=dict(title='Number of Immigrants'),
                  )

# Create the figure
fig = go.Figure(data=[trace], layout=layout)

# Add the arrow and text annotations
fig.add_annotation(x=32, y=70, xref='x', yref='y',
                   ax=28, ay=20, axref='x', ayref='y',
                   text='2008 - 2011 Financial Crisis',
                   font=dict(
                       family="sans serif",
                       size=16,
                       color='red'),
                   textangle=-30,
                   showarrow=True,
                   arrowhead=5, arrowsize=1, arrowwidth=3, arrowcolor='red')

# Set the color of the bars
fig.update_traces(marker_color='rgb(194, 93, 4)', marker_line_color='rgb(8,48,107)',
                  marker_line_width=1.5, opacity=0.4)

# Show the figure
fig.show()

This bar chart shows Icelandic immigrants to Canada from 1980 to 2013. The number of immigrants fluctuated from below 30 until 2008, there was a jump to 2013 reaching 72 people. The reason for this is that Iceland had a significant impact on the country's economy and its citizens in the period 2008-2011. This could explain the increase in the number of Icelandic immigrants to Canada in 2013.