# Load data


In [2]:
import numpy as np  
import pandas as pd 
import matplotlib as mpl
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [3]:
df_chi = pd.read_excel('MigrantesChile (2005-2016).xlsx',
                      sheet_name='Migrantes',
                      skipfooter=3)

print ('Data read into a pandas dataframe!')

Data read into a pandas dataframe!


In [4]:
df_chi.drop(['ID Continent','ID Country'], axis=1, inplace=True)
df_chi.head(4)

Unnamed: 0,Continent,Country,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
0,Asia,Afghanistan,8,4,5,1,2,1,3,6,15,4,12,15
1,Europe,Albania,2,1,1,2,2,5,3,2,1,2,7,11
2,Europe,Germany,495,481,562,614,587,644,685,747,687,845,778,582
3,Europe,Andorra,2,1,1,1,0,0,2,0,1,2,2,1


# Pregunta 1


In [5]:
df_chi['Total'] = df_chi.select_dtypes(include=['number']).sum(axis=1)

In [6]:
df_sorted = df_chi.sort_values(by=['Continent', 'Total'], ascending = False).groupby('Continent').head(1)
df_first_five = df_sorted.head(28)
df_sorted.head(28)

Unnamed: 0,Continent,Country,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,Total
33,Otros,Chile,392,173,50,26,20,30,0,0,0,0,0,0,691
11,Oceania,Australia,123,160,145,200,186,186,262,291,293,244,235,154,2479
52,Europe,Spain,825,869,944,936,1092,1170,1508,2989,5737,6262,6313,4458,33103
34,Asia,China,1131,1210,1517,1659,2054,2041,2295,2609,2669,3010,3583,3569,27347
127,America,Peru,25966,32746,63869,47336,56586,38048,40628,49643,48517,49900,59767,53624,566630
154,Africa,South Africa,43,52,58,89,94,69,100,96,94,93,89,65,942


In [7]:
df = df_first_five
years = list(map(str, range(2005, 2017)))
#fig = px.line(df, x="Country", y="Total")
#fig.show()

In [8]:
print(df_first_five)

    Continent       Country   2005   2006   2007   2008   2009   2010   2011  \
33      Otros         Chile    392    173     50     26     20     30      0   
11    Oceania     Australia    123    160    145    200    186    186    262   
52     Europe         Spain    825    869    944    936   1092   1170   1508   
34       Asia         China   1131   1210   1517   1659   2054   2041   2295   
127   America          Peru  25966  32746  63869  47336  56586  38048  40628   
154    Africa  South Africa     43     52     58     89     94     69    100   

      2012   2013   2014   2015   2016   Total  
33       0      0      0      0      0     691  
11     291    293    244    235    154    2479  
52    2989   5737   6262   6313   4458   33103  
34    2609   2669   3010   3583   3569   27347  
127  49643  48517  49900  59767  53624  566630  
154     96     94     93     89     65     942  


# Pregunta 2

In [9]:
df = df_first_five
df_melted = df.melt(id_vars=["Continent", "Country"], var_name="Year", value_name="Value")
fig = px.line(df_melted, x='Year', y='Value', color='Country', title='Yearly Data by Country')
fig.show()

In [10]:
fig = go.Figure()
for country in df_melted['Country'].unique():
    # Filter data for the current country in the loop
    country_data = df_melted[df_melted['Country'] == country]
    # Add a filled area trace for each country
    fig.add_trace(go.Scatter(
        x=country_data['Year'],
        y=country_data['Value'],
        fill='tozeroy',  # This fills the area under the line down to y=0
        mode='lines',  # Ensure that lines are drawn, and 'fill' is visible
        name=country  # Label for the legend with the name of the country
    ))

fig.update_layout(
    title='Yearly Data by Country (Non-Stacked Areas)',
    xaxis_title='Year',
    yaxis_title='Value',
    legend_title='Country'
)

fig.show()

In [11]:
fig = px.area(df_melted, x="Year", y="Value", color="Country", line_group="Country",  pattern_shape_sequence=[".", "x", "+"])
fig.show()

# Pregunta 3

In [12]:
continents = df_chi['Continent'].unique()

fig = make_subplots(rows=len(continents), cols=1, subplot_titles=continents)

for i, continent in enumerate(continents, start=1):
    continent_data = df_chi[df_chi['Continent'] == continent]
    fig.add_trace(
        go.Bar(
            x=continent_data['Country'], 
            y=continent_data['Total'], 
            name=f'Migrants in {continent}'
        ),
        row=i,
        col=1
    )

# Ajustes al layout para mejorar la visualización
fig.update_layout(
    title_text='Total Migrants by Country per Continent',
    height=300 * len(continents),  
    showlegend=False
)
fig.update_xaxes(title_text='Country')
fig.update_yaxes(title_text='Number of Migrants')

fig.show()


# Pregunta 4


In [15]:
# Selecciona los dos continentes que deseas comparar, por ejemplo, 'Europe' y 'Asia'
continent1 = 'Europe'
continent2 = 'Asia'

# Filtrar el DataFrame para incluir solo los continentes seleccionados
df_filtered = df_chi[df_chi['Continent'].isin([continent1, continent2])]

# Supongamos que los años están en columnas como '2005', '2006', ..., '2016'
years = [str(year) for year in range(2005, 2017)]
df_filtered.head(10)


Unnamed: 0,Continent,Country,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,Total
0,Asia,Afghanistan,8,4,5,1,2,1,3,6,15,4,12,15,76
1,Europe,Albania,2,1,1,2,2,5,3,2,1,2,7,11,39
2,Europe,Germany,495,481,562,614,587,644,685,747,687,845,778,582,7707
3,Europe,Andorra,2,1,1,1,0,0,2,0,1,2,2,1,13
7,Asia,Saudi Arabia,1,2,0,0,1,0,0,2,2,0,0,0,8
10,Asia,Armenia,0,2,0,2,3,1,4,2,0,2,2,4,22
12,Europe,Austria,53,54,41,32,55,40,77,63,105,137,139,74,870
13,Asia,Azerbaijan,0,0,0,0,3,0,0,0,1,2,1,0,7
15,Asia,Bangladesh,0,0,0,4,7,4,7,6,2,7,11,21,69
16,Europe,Belarus,7,2,8,11,14,20,26,27,18,12,35,17,197


In [20]:
# Agrupar por año y continente, sumando las cantidades de migrantes
df_grouped = df_filtered.groupby(['Continent'])
#d = df_filtered.groupby(['Continent'])[years].sum().transpose()
df_grouped.head(10)
# Crear el gráfico de barras
fig = go.Figure()

# Agregar las barras para cada continente
for continent in [continent1, continent2]:
    fig.add_trace(go.Bar(
        x=df_grouped.index,
        y=df_grouped[continent],
        name=continent
    ))

fig.update_layout(
    title='Comparative Migration by Year',
    xaxis_title='Year',
    yaxis_title='Number of Migrants',
    barmode='group'  
)

fig.show()

AttributeError: 'DataFrameGroupBy' object has no attribute 'index'