In [1]:
import pandas as pd
import numpy as np

In [2]:
import requests
import re
import seaborn as sns
import plotly.express as px


In [3]:
pd.set_option('display.max_columns', 20)
pd.set_option('display.width', 20)
pd.set_option('display.float_format', '{:.2f}'.format)

In [4]:
color_dict = {'Spaniards': '#f6bd60','Mixed': '#84a59d', 'Foreigners': '#3c096c'}
yn_dict = { 'Yes': '#e07a5f','No': '#3d405b'}

# Composition of the Houses

*01.Year* - Year of the data

*02.Code_Dist* - District code

*03.Name_Dist* - Name of the district

*04.Code_NBHD* - Neighborhood code

*05.Name_NBHD* - Name of the neighborhood

*06.Nationality* - Nationality of people

*07.Amount* -Number of people

*08.AVG_Age* - Average age of the population


In [5]:
composition = pd.read_csv("../datasets/Compo_House_Hist.csv")
composition

Unnamed: 0,Year,Code_Dist,Name_Dist,Code_NBHD,Name_NBHD,Nationality,Amount,AVG_Age
0,2019,1,Ciutat Vella,1,el Raval,Spaniards,8025,50.25
1,2019,1,Ciutat Vella,2,el Barri Gòtic,Spaniards,3105,51.08
2,2019,1,Ciutat Vella,3,la Barceloneta,Spaniards,3896,50.57
3,2019,1,Ciutat Vella,4,"Sant Pere, Santa Caterina i la Ribera",Spaniards,5305,49.73
4,2019,2,Eixample,5,el Fort Pienc,Spaniards,9533,49.02
...,...,...,...,...,...,...,...,...
1309,2014,10,Sant Martí,69,Diagonal Mar i el Front Marítim del Poblenou,Mixed,566,33.50
1310,2014,10,Sant Martí,70,el Besòs i el Maresme,Mixed,891,33.30
1311,2014,10,Sant Martí,71,Provençals del Poblenou,Mixed,659,32.20
1312,2014,10,Sant Martí,72,Sant Martí de Provençals,Mixed,741,33.50


Unnamed: 0,Year,Code_Dist,Name_Dist,Code_NBHD,Name_NBHD,Nationality,Amount,AVG_Age
0,2014,10,Sant Martí,73,la Verneda i la Pau,Mixed,782,35.50
1,2014,2,Eixample,5,el Fort Pienc,Foreigners,1368,32.50
2,2014,1,Ciutat Vella,4,"Sant Pere, Santa Caterina i la Ribera",Foreigners,2581,32.90
3,2014,1,Ciutat Vella,3,la Barceloneta,Foreigners,1470,32.80
4,2014,1,Ciutat Vella,2,el Barri Gòtic,Foreigners,1800,31.70
...,...,...,...,...,...,...,...,...
1309,2019,10,Sant Martí,69,Diagonal Mar i el Front Marítim del Poblenou,Foreigners,764,33.25
1310,2019,10,Sant Martí,68,el Poblenou,Foreigners,1751,32.67
1311,2019,10,Sant Martí,67,la Vila Olímpica del Poblenou,Foreigners,455,33.54
1312,2019,2,Eixample,5,el Fort Pienc,Mixed,1596,37.06


In [7]:
composition.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Year,1314.0,2016.5,1.71,2014.0,2015.0,2016.5,2018.0,2019.0
Code_Dist,1314.0,6.25,2.79,1.0,4.0,7.0,8.0,10.0
Code_NBHD,1314.0,37.0,21.08,1.0,19.0,37.0,55.0,73.0
Amount,1314.0,3005.86,4165.81,10.0,487.5,1037.0,3430.25,21414.0
AVG_Age,1314.0,38.07,6.87,27.1,32.39,35.19,45.69,52.2


In [9]:
fig = px.histogram(composition, x="AVG_Age", barmode='overlay', title="Average Age of the Composition of the Houses Histogram",                  color_discrete_map=color_dict, color="Nationality",  template="plotly_white",
                 hover_data=['Name_NBHD','Name_Dist'], labels={'Name_NBHD':'Neighborhood Name'})
fig.show()

In [10]:
fig = px.histogram(composition, x="Amount", 
                   barmode='overlay',title="Amount of People",#animation_frame="Year",
                   color_discrete_map=color_dict, color="Nationality",  template="plotly_white",
                   hover_data=['Name_NBHD','Name_Dist'], labels={'Name_NBHD':'Neighborhood Name'})
                   
fig.show()

In [11]:
fig = px.scatter(composition, x='Code_NBHD', y='AVG_Age', animation_frame="Year", title="District",
                 color_discrete_map=color_dict, color="Nationality",  template="plotly_white",
                 hover_name='Name_NBHD', labels={'Name_NBHD':'Neighborhood Name'})
fig.show()

In [12]:
fig = px.box(composition,  y="Amount" , x="Year", 
            color_discrete_map=color_dict, color="Nationality",  template="plotly_white",
                 hover_data=['Name_NBHD','Name_Dist'], labels={'Name_NBHD':'Neighborhood Name'})
fig.show()

In [16]:
fig = px.scatter(composition.sort_values(by="Year").reset_index(drop=True), x='AVG_Age', y='Amount', animation_frame="Year", title="District",
                 color_discrete_map=color_dict, color="Nationality",  template="plotly_white",
                 hover_name='Name_NBHD', labels={'Name_NBHD':'Neighborhood Name'})
fig.show()

# Nationality

*01.Year* - Year of the data

*02.Code_Dist* - District code

*03.Name_Dist* - Name of the district

*04.Code_NBHD* - Neighborhood code

*05.Name_NBHD* - Name of the neighborhood

*06.Gender* - Gender of the persons (Binary Category)

*07.Nationality* - Nationality of people

*08.Amount* - Number of people


In [12]:
nationality = pd.read_csv("../datasets/Nationality_Hist.csv")
nationality

Unnamed: 0,Year,Code_Dist,Name_Dist,Code_NBHD,Name_NBHD,Gender,Nationality,Amount
0,2019,1,Ciutat Vella,1,el Raval,Women,Espanya,11666.00
1,2019,1,Ciutat Vella,2,el Barri Gòtic,Women,Espanya,4214.00
2,2019,1,Ciutat Vella,3,la Barceloneta,Women,Espanya,4801.00
3,2019,1,Ciutat Vella,4,"Sant Pere, Santa Caterina i la Ribera",Women,Espanya,6570.00
4,2019,2,Eixample,5,el Fort Pienc,Women,Espanya,13074.00
...,...,...,...,...,...,...,...,...
153441,2014,10,Sant Martí,69,Diagonal Mar i el Front Marítim del Poblenou,Women,Apàtrides,0.00
153442,2014,10,Sant Martí,70,el Besòs i el Maresme,Women,Apàtrides,0.00
153443,2014,10,Sant Martí,71,Provençals del Poblenou,Women,Apàtrides,0.00
153444,2014,10,Sant Martí,72,Sant Martí de Provençals,Women,Apàtrides,0.00


In [13]:
nationality.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 153446 entries, 0 to 153445
Data columns (total 8 columns):
 #   Column       Non-Null Count   Dtype  
---  ------       --------------   -----  
 0   Year         153446 non-null  int64  
 1   Code_Dist    153446 non-null  int64  
 2   Name_Dist    153446 non-null  object 
 3   Code_NBHD    153446 non-null  int64  
 4   Name_NBHD    153446 non-null  object 
 5   Gender       153446 non-null  object 
 6   Nationality  153446 non-null  object 
 7   Amount       153445 non-null  float64
dtypes: float64(1), int64(3), object(4)
memory usage: 9.4+ MB


In [14]:
nationality.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Year,153446.0,2016.54,1.71,2014.0,2015.0,2017.0,2018.0,2019.0
Code_Dist,153446.0,6.25,2.79,1.0,4.0,7.0,8.0,10.0
Code_NBHD,153446.0,37.0,21.07,1.0,19.0,37.0,55.0,73.0
Amount,153445.0,63.17,826.75,0.0,0.0,0.0,4.0,28065.0


In [20]:
nationality.Nationality.nunique()

206

In [16]:
fig = px.box(nationality.loc[((nationality.Amount > 4) & (nationality.Nationality  != "Espanya")), :], x="Amount", title="Average Age Histogram", color="Gender",animation_frame="Year")
fig.show()

In [17]:
fig = px.histogram(nationality.loc[((nationality.Amount > 4) & (nationality.Nationality  != "Espanya")), :], x="Amount", title="Average Age Histogram", color="Gender",animation_frame="Year")
fig.show()

In [None]:
fig = px.histogram(nationality.loc[((nationality.Amount > 4) & (nationality.Nationality  != "Espanya")), :], x="Amount", title="Average Age Histogram", color="Gender",animation_frame="Year")


# Age

*01.Year* - Year of the data

*02.Code_Dist* - District code

*03.Name_Dist* - Name of the district

*04.Code_NBHD* - Neighborhood code

*05.Name_NBHD* - Name of the neighborhood

*06.Gender* - Gender of people (Binary category)

*07.Nationality* - Nationality of people

*08.Age* - Age of people (every 5 years)

*09.Amount* - Number of people

In [18]:
age = pd.read_csv("../datasets/People_Age.csv")
age

Unnamed: 0,Year,Code_Dist,Name_Dist,Code_NBHD,Name_NBHD,Gender,Nationality,Age,Amount
0,2019,1,Ciutat Vella,1,el Raval,Home,Espanyola,0-4 anys,438
1,2019,1,Ciutat Vella,2,el Barri Gòtic,Home,Espanyola,0-4 anys,126
2,2019,1,Ciutat Vella,3,la Barceloneta,Home,Espanyola,0-4 anys,123
3,2019,1,Ciutat Vella,4,"Sant Pere, Santa Caterina i la Ribera",Home,Espanyola,0-4 anys,228
4,2019,2,Eixample,5,el Fort Pienc,Home,Espanyola,0-4 anys,464
...,...,...,...,...,...,...,...,...,...
18391,2017,10,Sant Martí,69,Diagonal Mar i el Front Marítim del Poblenou,Dona,Estrangera,100 anys i més,0
18392,2017,10,Sant Martí,70,el Besòs i el Maresme,Dona,Estrangera,100 anys i més,0
18393,2017,10,Sant Martí,71,Provençals del Poblenou,Dona,Estrangera,100 anys i més,0
18394,2017,10,Sant Martí,72,Sant Martí de Provençals,Dona,Estrangera,100 anys i més,0


In [19]:
age.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18396 entries, 0 to 18395
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Year         18396 non-null  int64 
 1   Code_Dist    18396 non-null  int64 
 2   Name_Dist    18396 non-null  object
 3   Code_NBHD    18396 non-null  int64 
 4   Name_NBHD    18396 non-null  object
 5   Gender       18396 non-null  object
 6   Nationality  18396 non-null  object
 7   Age          18396 non-null  object
 8   Amount       18396 non-null  int64 
dtypes: int64(4), object(5)
memory usage: 1.3+ MB


In [21]:
age.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Year,18396.0,2018.0,0.82,2017.0,2017.0,2018.0,2019.0,2019.0
Code_Dist,18396.0,6.25,2.79,1.0,4.0,7.0,8.0,10.0
Code_NBHD,18396.0,37.0,21.07,1.0,19.0,37.0,55.0,73.0
Amount,18396.0,266.6,339.06,0.0,20.0,125.0,398.25,2381.0
