## Import

In [99]:
import pandas as pd

from matplotlib import pyplot as plt
import seaborn as sns

import chart_studio.plotly as py
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode
from plotly.subplots import make_subplots
import plotly.express as px

import cufflinks
cufflinks.go_offline(connected=True)
init_notebook_mode(connected=True)

pd.set_option('display.max_columns', None)

## Purpose of this project

I decided to make this project to practise with Plotly a little bit and to understand what happend with Armenian population in last years. I used data from the site of Statistical Comitee of the Republic of Armenia.

Questions I would like to answer:
* How does the population of Armenia change through time?
* What populational waves are there? What generations?
* How does the sex ratio changes through time? Possible reasons.

## Data load

In [15]:
population = pd.read_csv('99531228.csv', header=[0, 1], index_col=0)

In [16]:
population.tail()

Age,2011,2011,2011,2012,2012,2012,2013,2013,2013,2014,2014,2014,2015,2015,2015,2016,2016,2016,2017,2017,2017,2018,2018,2018,2019,2019,2019,2020,2020,2020,2021,2021,2021,2022,2022,2022
Unnamed: 0_level_1,Male,Female,Total,Male,Female,Total,Male,Female,Total,Male,Female,Total,Male,Female,Total,Male,Female,Total,Male,Female,Total,Male,Female,Total,Male,Female,Total,Male,Female,Total,Male,Female,Total,Male,Female,Total
80+,24636,44075.0,68711.0,24712.0,44020.0,68732.0,26009.0,45672.0,71681.0,26728.0,46021.0,72749.0,26670.0,45359.0,72029.0,28340.0,47610.0,75950.0,29332.0,49868.0,79200.0,30667.0,52623.0,83290.0,32122.0,55605.0,87727.0,33237.0,57864.0,91101.0,33380.0,59058.0,92438.0,32533.0,58522.0,91055.0
Total,1448052,1570802.0,3018854.0,1449473.0,1571903.0,3021376.0,1451646.0,1575233.0,3026879.0,1443497.0,1573582.0,3017079.0,1439148.0,1571450.0,3010598.0,1429042.0,1569535.0,2998577.0,1418771.0,1567380.0,2986151.0,1408199.0,1564533.0,2972732.0,1401731.0,1563538.0,2965269.0,1397005.0,1562689.0,2959694.0,1398107.0,1565144.0,2963251.0,1397169.0,1564198.0,2961367.0
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
,*) Based on RA 2011 Population Census.,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [18]:
population.columns = population.columns.swaplevel(0, 1)
population.sort_index(axis=1, level=0, inplace=True)

In [19]:
population = population.reset_index()
population = population.drop([82, 83, 84], axis=0)
population = population.rename({'index':'Age'}, axis=1)

In [20]:
total_pop2 = population['Total']

In [21]:
total_pop1 = pd.read_csv('Total2002.csv')
total_pop1.columns = ['Age', *range(2002, 2012)]

In [23]:
tp_2 = pd.DataFrame(columns=total_pop2.columns)

In [24]:
tp_2['Age'] = total_pop1['Age']

In [25]:
tp_2.iloc[0, :12] = total_pop2.iloc[0]
tp_2.iloc[1, :12] = total_pop2.iloc[1:4].sum()
tp_2.iloc[17, :12] = total_pop2.iloc[80]
tp_2.iloc[18, :12] = total_pop2.iloc[81]

In [26]:
for i, j in zip(range(2, 17), range(5, 80, 5)):
    tp_2.iloc[i, :12] = total_pop2.iloc[j:j+5].sum()

In [27]:
tp_2 = tp_2.drop('2011', axis=1)

In [28]:
total_pop = pd.merge(total_pop1, tp_2, on='Age')
total_pop = total_pop.set_index('Age')

In [34]:
male_pop2 = population['Male']
female_pop2 = population['Female']

In [35]:
male_pop1 = pd.read_csv('Male2002.csv')
female_pop1 = pd.read_csv('Female2002.csv')

In [36]:
pop1 = [male_pop1, female_pop1]
for pop in  pop1:
    pop.columns = ['Age', *range(2002, 2012)]
male_pop1.replace({'Total Male Population': 'Total Population'}, inplace=True)
female_pop1.replace({'Total Female Population': 'Total Population'}, inplace=True)

In [37]:
tp_2_m = pd.DataFrame(columns=male_pop2.columns)
tp_2_f = pd.DataFrame(columns=female_pop2.columns)

In [38]:
tp_2_m['Age'] = total_pop1['Age']
tp_2_f['Age'] = total_pop1['Age']

In [40]:
for pop, popul in zip([tp_2_m, tp_2_f], [male_pop2, female_pop2]):
    pop.iloc[0, :12] = popul.iloc[0]
    pop.iloc[1, :12] = popul.iloc[1:4].sum()
    pop.iloc[17, :12] = popul.iloc[80]
    pop.iloc[18, :12] = popul.iloc[81]
    for i, j in zip(range(2, 17), range(5, 80, 5)):
        pop.iloc[i, :12] = popul.iloc[j:j+5].sum()
    pop.drop('2011', axis=1, inplace=True)

In [43]:
male_pop = pd.merge(male_pop1, tp_2_m, on='Age')
female_pop = pd.merge(female_pop1, tp_2_f, on='Age')

In [44]:
male_pop = male_pop.set_index('Age')

In [45]:
female_pop = female_pop.set_index('Age')

In [87]:
m_f_total = pd.DataFrame([male_pop.loc['Total Population'],
                         female_pop.loc['Total Population'], total_pop.loc['Total Population']])

In [88]:
m_f_total = m_f_total.transpose()
m_f_total.reset_index(inplace=True)
m_f_total.columns = ['Year', 'Male', 'Female', 'Total']

## Answer the questions

### How does the population of Armenia change through time?

In [31]:
total_pop.iloc[18].iplot(yTitle='Population', xTitle='Year',
                         title='Total Population of Armenia 2002-2022')

We can see that population of Armenia decrease since 2002 almost all the time. Lets calculate the speed of decreasing.

In [105]:
#calculate the speed of growth
speed_of_growth = pd.DataFrame(total_pop.iloc[18])

speed_of_growth['Shift'] = speed_of_growth.shift(1)
speed_of_growth.reset_index(inplace=True)
speed_of_growth['Speed'] = (speed_of_growth['Total Population'] -
                            speed_of_growth['Shift']) / speed_of_growth['Total Population'] * 100
speed_of_growth.rename(columns={'index':'Year'}, inplace=True)

In [109]:
fig = px.bar(speed_of_growth, y='Speed', x='Year',
             title='Speed of population growth in Armenia 2002-2022', labels={'Speed': 'Speed, %'})
fig.show()

Global human population growth amounts to around 1.1% per year. In Armenia growth of population was recorded in 2013 and 2021, but even then it was lower than global. The worst decreasing was recorded in 2010, but since 2012 situation was more positive.

### What populational waves are there? What generations?

In [32]:
total_pop.iloc[:18, [0, 10, 20]].iplot(yTitle='Population', xTitle='Year',
                         title='Total Population of Armenia 2002-2022')

We can see three peaks on this plot. First corresponds to generation born in 1992 - 1988, second corresponds to generation born in 1962-1958 and third corresponds to generation born in 1942-1938.
Between them we see demographic holes - 2002-1998, 1972-1968, 1947-1943. Partly this occurs due to populational waves (generation which contains less people gives birth to less new people). But in 2002-1998 less children might be born because of economical crisis.

### How does the sex ratio changes through time?

In [90]:
m_f_total[['Year', 'Male', 'Female']].iplot(x='Year', yTitle='Population', xTitle='Year',
                         title='Male and Female Population of Armenia 2002-2022')

In [91]:
m_f_total['Ratio'] = m_f_total['Male'] / m_f_total['Female']

In [95]:
m_f_total[['Year', 'Ratio']].iplot(x='Year', yTitle='Population', xTitle='Year',
                         title='Male to Female Ratio in Armenia 2002-2022')

We can see that female population is almost stable since 2012, while male population continue decreasing.

## Conclusions

During graphical data analysis we saw that population of Armenia has been decreasing for the last 20 years. The decrease was not homogenous - there were years of more strong decrease (before 2012) and years then there was a slight increase(2013 and 2021). There was also some demographic holes, some of them had war reasons (World War II), some had economical reasons. Sex ratio of Armenian population also changes, it was about 100 women to 92 men untill the 2013, but since that it started to decrease and now, in 2022, there is 89 men to 100 women.

I think that further study of Armenian modern history will give me an understanding of reasons of this facts.