In [1]:
# importing libraries
import pandas as pd
import numpy as np
import chart_studio.plotly as py
import seaborn as sns
import plotly.express as px
import cufflinks as cf
import matplotlib.pyplot as plt
import plotly.graph_objects as go

%matplotlib inline

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

In [2]:
init_notebook_mode(connected=True)
cf.go_offline()

# Loading data

In [3]:
population = pd.read_csv("population.csv")
population

Unnamed: 0,Hududlar bo`yicha shahar va qishloq aholisi soni,Unnamed: 1,Unnamed: 2,Unnamed: 3
0,(yil boshiga; ming kishi),,,
1,,,,
2,,Jami aholi,shu jumladan:,
3,,,shahar aholisi,qishloq aholisi
4,2021*,,,
...,...,...,...,...
373,Sirdaryo,642.2,206.1,436.1
374,Toshkent,2350.2,949.7,1400.5
375,Farg`ona,2664.4,776.1,1888.3
376,Xorazm,1323.9,314.7,1009.2


In [4]:
# function for extracting value according to year
def Data_year(st_p, df):
    year = df[st_p:st_p+16]
    year.rename(columns={'Unnamed: 1':'total', 'Unnamed: 2':'city_pop','Unnamed: 3':'village_pop'}, inplace=True)
    year.set_index('Hududlar bo`yicha shahar va qishloq aholisi soni')
    year.reset_index(inplace=True) # resetting index
    year.drop(columns='index', inplace=True)  
    return year.drop(index=2) # droping "viloyatlar" NaN value & return 

In [5]:
import re

years= []
indeces =[]
pattern = re.compile(r'\d{4}')
for i in range(len(population)):
    if pattern.search(str(population.loc[i][0])) is not None:
        years.append(population.loc[i][0])
        indeces.append(i+1)

In [6]:
values = dict()
for i, j in zip(years, indeces):
    values[str(i)] = j


# Population (Visulization) 

In [7]:
a =[]
b =[]
for key, value in reversed(values.items()):
    entry = Data_year(value, population) 
    a.append(key)
    b.append(float(entry['total'][0])*1000)
fig=px.bar(x=a, y=b, labels={'x': 'Year', 'y': '#Population'}, title="The number of Population in Uzbekistan (2000-2021)", text=b)
fig.update_traces(texttemplate='%{text:.3s}', textposition='outside',marker_color='rgb(55, 83, 109)')
fig.update_layout(plot_bgcolor='white',xaxis_tickangle=-45, xaxis=dict(tickfont_size=14,titlefont_size=16), yaxis=dict(tickfont_size=14,titlefont_size=16))

 ## Population distrubution (City&Village)

In [8]:
a = []
c = []
v = []
for key, value in reversed(values.items()):
    entry = Data_year(value, population)
    a.append(key)
    c.append(float(entry['city_pop'][0])*1000)
    v.append(float(entry['village_pop'][0])*1000)
    
fig = go.Figure()
fig.add_trace(go.Bar(x=a, y=c, name='city_pop', marker_color='rgb(102,205,170)'))
fig.add_trace(go.Bar(x=a, y=v, name='village_pop',marker_color='rgb(26, 118, 255)'))
fig.update_layout(plot_bgcolor='white', title='Population distribution over City&Village (2000-2021)',xaxis_tickangle=-45, xaxis=dict(title='Year', tickfont_size=14,titlefont_size=16),
                  yaxis=dict(title='#Population',tickfont_size=14,titlefont_size=16))


 - Interestingly, the number of people live in the rural areas were greater (almost double) than the urban areas untill 2008. 
 
 
 - From 2009, the number people living in the cities exceeded roughly 6 % than the  number people living in the villages. 
 

# Population distributions according to districts

In [9]:
# user input for comparing years
years_inp = input('Please, insert the years to compare their populations according to regions ex. (2012-2021) >>> ')
comp_years = years_inp.split('-')
if '2021' in comp_years:
    comp_years[comp_years.index('2021')]='2021*'
    
def comp_dist():
    dic = dict()
    for i in comp_years:  
        dic[i]=values[i]
    return dic

c_y = comp_dist()
# plotting
from plotly.subplots import make_subplots

entry1 = Data_year(c_y[comp_years[0]], population)
entry2 = Data_year(c_y[comp_years[1]], population)
labels=entry['Hududlar bo`yicha shahar va qishloq aholisi soni'][1:]
fig = make_subplots(1, 2, specs=[[{'type':'domain'}, {'type':'domain'}]],
                subplot_titles=[comp_years[0], comp_years[1]])
fig.add_trace(go.Pie(labels=labels, values=entry1['total'][1:], scalegroup='one',
                     name="Aholi soni"), 1, 1)
fig.add_trace(go.Pie(labels=labels, values=entry2['total'][1:], scalegroup='one',
                     name="Aholi soni"), 1, 2)
fig.update_traces(hole=.3, hoverinfo="label+percent+name+value")
fig.update_traces(textposition='inside', textinfo='percent')
fig.update_layout(title_text=f'Population distribution over regions (x1000) ({comp_years[0]} vs {comp_years[1]})')
fig


Please, insert the years to compare their populations according to regions ex. (2012-2021) >>> 2000-2021
