In [1]:
import pandas as pd
import numpy as np

In [2]:
import requests
import re
import seaborn as sns
import plotly.express as px
import plotly.graph_objs as go
from pandas_profiling import ProfileReport


In [3]:
pd.set_option('display.max_columns', 20)
pd.set_option('display.width', 20)
pd.set_option('display.float_format', '{:.2f}'.format)

In [4]:
color_dict = {'Spaniards': '#f6bd60','Mixed': '#84a59d', 'Foreigners': '#3c096c'}
yn_dict = { 'Yes': '#e07a5f','No': '#3d405b'}

# Composition of the Houses

*01.Year* - Year of the data

*02.Code_Dist* - District code

*03.Name_Dist* - Name of the district

*04.Code_NBHD* - Neighborhood code

*05.Name_NBHD* - Name of the neighborhood

*06.Nationality* - Nationality of people

*07.Amount* -Number of people

*08.AVG_Age* - Average age of the population


In [5]:
composition = pd.read_csv("../datasets/Compo_House_Hist.csv")
composition

Unnamed: 0,Year,Code_Dist,Name_Dist,Code_NBHD,Name_NBHD,Nationality,Amount,AVG_Age
0,2019,1,Ciutat Vella,1,el Raval,Spaniards,8025,50.25
1,2019,1,Ciutat Vella,2,el Barri Gòtic,Spaniards,3105,51.08
2,2019,1,Ciutat Vella,3,la Barceloneta,Spaniards,3896,50.57
3,2019,1,Ciutat Vella,4,"Sant Pere, Santa Caterina i la Ribera",Spaniards,5305,49.73
4,2019,2,Eixample,5,el Fort Pienc,Spaniards,9533,49.02
...,...,...,...,...,...,...,...,...
1309,2014,10,Sant Martí,69,Diagonal Mar i el Front Marítim del Poblenou,Mixed,566,33.50
1310,2014,10,Sant Martí,70,el Besòs i el Maresme,Mixed,891,33.30
1311,2014,10,Sant Martí,71,Provençals del Poblenou,Mixed,659,32.20
1312,2014,10,Sant Martí,72,Sant Martí de Provençals,Mixed,741,33.50


profile = ProfileReport(composition, title="composition Report")
profile.to_notebook_iframe()
profile.to_file("composition_report.html")


In [6]:
composition.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Year,1314.0,2016.5,1.71,2014.0,2015.0,2016.5,2018.0,2019.0
Code_Dist,1314.0,6.25,2.79,1.0,4.0,7.0,8.0,10.0
Code_NBHD,1314.0,37.0,21.08,1.0,19.0,37.0,55.0,73.0
Amount,1314.0,3005.86,4165.81,10.0,487.5,1037.0,3430.25,21414.0
AVG_Age,1314.0,38.07,6.87,27.1,32.39,35.19,45.69,52.2


In [11]:
a = composition.copy()
a.AVG_Age = a.AVG_Age.apply(lambda x : int(x))
a = a.groupby(["AVG_Age", "Nationality", "Year"]).count().reset_index()
a

Unnamed: 0,AVG_Age,Nationality,Year,Code_Dist,Name_Dist,Code_NBHD,Name_NBHD,Amount
0,27,Foreigners,2014,1,1,1,1,1
1,27,Mixed,2015,1,1,1,1,1
2,27,Mixed,2016,1,1,1,1,1
3,28,Foreigners,2015,1,1,1,1,1
4,28,Foreigners,2018,1,1,1,1,1
...,...,...,...,...,...,...,...,...
180,51,Spaniards,2017,2,2,2,2,2
181,51,Spaniards,2018,1,1,1,1,1
182,51,Spaniards,2019,1,1,1,1,1
183,52,Spaniards,2014,1,1,1,1,1


In [14]:
fig = px.bar(a, x="AVG_Age", y = "Amount", title="Average Age of the Composition of the Houses Histogram", barmode= 'overlay',
             color_discrete_map=color_dict, color="Nationality",  template="plotly_white", animation_frame="Year",
             hover_data=['Name_NBHD','Name_Dist'], labels={'Name_NBHD':'Neighborhood Name'})
#fig.add_scatter(y="Amount", mode="markers", name="steepest", line=dict(color="black"))
fig.show()

In [15]:
fig = px.scatter(a, x="AVG_Age", y = "Amount", title="Average Age of the Composition of the Houses Histogram"  ,
             color_discrete_map=color_dict, color="Nationality",  template="plotly_white", animation_frame="Year",
             hover_data=['Name_NBHD','Name_Dist'], labels={'Name_NBHD':'Neighborhood Name'})

fig.show()

In [17]:
fig = px.histogram(composition, x="Amount", barmode='overlay',title="Amount of People",animation_frame="Year",
                   color_discrete_map=color_dict, color="Nationality",  template="plotly_white",
                   hover_data=['Name_NBHD','Name_Dist'], labels={'Name_NBHD':'Neighborhood Name'})
                   
fig.show()

In [18]:
composition.groupby(["Nationality","Year"]).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Code_Dist,Code_NBHD,Amount,AVG_Age
Nationality,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Foreigners,2014,6.25,37.0,768.0,31.34
Foreigners,2015,6.25,37.0,748.95,31.82
Foreigners,2016,6.25,37.0,776.1,31.7
Foreigners,2017,6.25,37.0,848.04,31.55
Foreigners,2018,6.25,37.0,915.99,31.56
Foreigners,2019,6.25,37.0,985.18,31.73
Mixed,2014,6.25,37.0,923.55,34.14
Mixed,2015,6.25,37.0,918.23,34.34
Mixed,2016,6.25,37.0,922.51,34.47
Mixed,2017,6.25,37.0,952.67,34.67


In [19]:
fig = px.line(composition, x='Code_NBHD', y='AVG_Age', animation_frame="Year", title="District",
                 color_discrete_map=color_dict, color="Nationality",  template="plotly_white",
                 hover_name='Name_NBHD', labels={'Name_NBHD':'Neighborhood Name'})
fig.show()

#PEnsar en usar un dotplot y hacer lineas  verticales que hagan de separacion de distrito 
#Porque los barrios no son un continuo entonces una linea no tiene sentido!

In [20]:
fig = px.box(composition,  y="Amount" , x="Year", 
            color_discrete_map=color_dict, color="Nationality",  template="plotly_white",
                 hover_data=['Name_NBHD','Name_Dist'], labels={'Name_NBHD':'Neighborhood Name'})
fig.show()

In [21]:
fig = px.scatter(composition.sort_values(by="Year").reset_index(drop=True), x='AVG_Age', y='Amount', animation_frame="Year", title="District",
                 color_discrete_map=color_dict, color="Nationality",  template="plotly_white",
                 hover_name='Name_NBHD', labels={'Name_NBHD':'Neighborhood Name'})
fig.show()

# Nationality

*01.Year* - Year of the data

*02.Code_Dist* - District code

*03.Name_Dist* - Name of the district

*04.Code_NBHD* - Neighborhood code

*05.Name_NBHD* - Name of the neighborhood

*06.Gender* - Gender of the persons (Binary Category)

*07.Nationality* - Nationality of people

*08.Amount* - Number of people


In [22]:
nationality = pd.read_csv("../datasets/Nationality_Hist.csv")
nationality

Unnamed: 0,Year,Code_Dist,Name_Dist,Code_NBHD,Name_NBHD,Gender,Nationality,Amount
0,2019,1,Ciutat Vella,1,el Raval,Women,Espanya,11666.00
1,2019,1,Ciutat Vella,2,el Barri Gòtic,Women,Espanya,4214.00
2,2019,1,Ciutat Vella,3,la Barceloneta,Women,Espanya,4801.00
3,2019,1,Ciutat Vella,4,"Sant Pere, Santa Caterina i la Ribera",Women,Espanya,6570.00
4,2019,2,Eixample,5,el Fort Pienc,Women,Espanya,13074.00
...,...,...,...,...,...,...,...,...
153441,2014,10,Sant Martí,69,Diagonal Mar i el Front Marítim del Poblenou,Women,Apàtrides,0.00
153442,2014,10,Sant Martí,70,el Besòs i el Maresme,Women,Apàtrides,0.00
153443,2014,10,Sant Martí,71,Provençals del Poblenou,Women,Apàtrides,0.00
153444,2014,10,Sant Martí,72,Sant Martí de Provençals,Women,Apàtrides,0.00


profile = ProfileReport(nationality, title="nationality Report")
profile.to_notebook_iframe()
profile.to_file("nationality_report.html")

In [23]:
nationality.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 153446 entries, 0 to 153445
Data columns (total 8 columns):
 #   Column       Non-Null Count   Dtype  
---  ------       --------------   -----  
 0   Year         153446 non-null  int64  
 1   Code_Dist    153446 non-null  int64  
 2   Name_Dist    153446 non-null  object 
 3   Code_NBHD    153446 non-null  int64  
 4   Name_NBHD    153446 non-null  object 
 5   Gender       153446 non-null  object 
 6   Nationality  153446 non-null  object 
 7   Amount       153445 non-null  float64
dtypes: float64(1), int64(3), object(4)
memory usage: 9.4+ MB


In [24]:
nationality.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Year,153446.0,2016.54,1.71,2014.0,2015.0,2017.0,2018.0,2019.0
Code_Dist,153446.0,6.25,2.79,1.0,4.0,7.0,8.0,10.0
Code_NBHD,153446.0,37.0,21.07,1.0,19.0,37.0,55.0,73.0
Amount,153445.0,63.17,826.75,0.0,0.0,0.0,4.0,28065.0


In [25]:
nationality.Nationality.value_counts()

Mònaco                                  876
Islàndia                                876
Espanya                                 876
Gàmbia                                  876
Estònia                                 876
                                       ... 
República  Centreafricana               146
Altres països o territoris d'Oceania    146
Altres països o territoris d'Europa     146
Altres països Àsia                      146
Iraq                                    146
Name: Nationality, Length: 206, dtype: int64

In [26]:
nat_cat = nationality.Nationality.unique()
nat_eng = ['Spain', 'Italy', 'France', 'United Kingdom', 'Russia', 'Germany',
       'Romania', 'Portugal', 'Ukraine', 'Georgia', 'Netherlands',
       'Poland', 'Bulgaria', 'Sweden', 'Belgium', 'Greece', 'Ireland',
       'Switzerland', 'Hungary', 'Austria', 'Denmark', 'Finland',
       'Moldova', 'Lithuania', 'Belarus', 'Czech Republic', 'Serbia', 'Albania',
       'Croatia', 'Norway', 'Slovakia', 'Andorra', 'Latvia',
       'Estonia', 'Slovenia', 'Macedonia', 'Iceland',
       'Bosnia and Herzegovina', 'Luxembourg', 'Malta', 'Montenegro',
       'Liechtenstein', 'Monaco', 'San Marino', 'Other European countries or territories',
       'Morocco', 'Algeria', 'Senegal', 'Nigeria', 'Egypt', 'Ghana',
       'Guinea', 'Equatorial Guinea', 'Tunisia', 'Cameroon', 'Gambia',
       'South Africa', 'Mali', 'Libya', 'Guinea-Bissau', 'Ivory Coast',
       'Kenya', 'Mauritania', 'Angola', 'Congo', 'Ethiopia',
       'Burkina Faso', 'Tanzania', 'Sierra Leone', 'Mozambique',
       'Sudan', 'Benin', 'Madagascar', 'Somalia', 'Togo', 'Uganda',
       'Cape Verde', 'Niger', 'Mauritius', 'Namibia', 'Zimbabwe', 'Rwanda',
       'Liberia', 'São Tomé and Príncipe', 'Eritrea', 'Malawi',
       'Central African Republic', 'Botswana', 'Burundi', 'Gabon',
       'Zambia', 'Chad', 'Seychelles', 'Comoros, the', 'Djibouti',
       'Swaziland', 'Democratic Republic of the Congo',
       'Other African countries', 'Colombia', 'Honduras', 'Peru',
       'Venezuela', 'Bolivia', 'Ecuador', 'Argentina', 'Brazil',
       'Dominican Republic', 'United States,', 'Mexico', 'Paraguay',
       'Chile', 'Cuba', 'El Salvador', 'Uruguay', 'Nicaragua', 'Canada',
       'Costa Rica', 'Panama', 'Guatemala', 'Dominica', 'Haiti',
       'Trinidad and Tobago', 'Jamaica', 'Barbados', 'Bahamas, the',
       'Saint Vincent and the Grenadines', 'Guyana', 'Suriname',
       'Antigua and Barbuda', 'Belize', 'Grenada', 'Saint Kitts and Nevis',
       'China', 'Pakistan', 'Philippines', 'India', 'Bangladesh', 'Armenia',
       'Japan', 'Nepal', 'Turkey', 'Iran', 'South Korea', 'Syria',
       'Israel', 'Lebanon', 'Kazakhstan', 'Vietnam', 'Thailand',
       'Jordan', 'Indonesia', 'Azerbaijan', 'Cyprus', 'Iraqe',
       'Uzbekistan', 'Afghanistan', 'Saudi Arabia', 'Palestine',
       'Malaysia', 'Mongolia', 'Singapore', 'Kyrgyzstan', 'Kuwait',
       'Sri Lanka', 'Yemen', 'Qatar', 'Turkmenistan',
       'United Arab Emirates','Myanmar','Cambodia ','Tajikistan',
       'Bahrain', 'Maldives', 'Lao', 'Oman', 'Other Asian Countries',
       'Australia', 'New Zealand', 'Solomon Islands', 'Stateless Persons',
       "Other European countries or territories",
       'Guinea-Bissau', 'Other Africa countries or territories',
       'Trinidad and Tobago',
       "Other South America countries or territories", 'Korea',
       'Palestinian Territories', 'Timor-Leste',
       "Other Asia countries or territories",
       "Other European countries or territories", 'Lesotho',
       "Other America countries or territories",
       "Other Asia countries or territories", 'Central African Republic',
       'Saint Lucia', 'Other Oceania countries or territories',
       'Other European countries or territories', 'Other countries Africa', 'Kyrgyzstan',
       'Other Asia countries or territories', 'Morocco', 'Bahamas', 'United Arab Emirates',
       'Yemen', 'Iraq', 'Laos']
europa = ['Spain', 'Italy', 'France', 'United Kingdom', 'Russia', 'Germany', 'Romania', 'Portugal', 'Ukraine', 'Georgia', 
          'Netherlands', 'Poland', 'Bulgaria', 'Sweden', 'Belgium', 'Greece', 'Ireland', 'Switzerland', 'Hungary', 'Austria',
          'Denmark', 'Finland', 'Moldova', 'Lithuania', 'Belarus', 'Czech Republic', 'Serbia', 'Albania', 'Croatia', 'Norway', 
          'Slovakia', 'Andorra', 'Latvia', 'Estonia', 'Slovenia', 'Macedonia', 'Iceland', 'Bosnia and Herzegovina', 
          'Luxembourg', 'Malta', 'Montenegro', 'Liechtenstein', 'Monaco', 'San Marino', 'Other European countries or territories']
africa = ['Morocco', 'Algeria', 'Senegal', 'Nigeria', 'Egypt', 'Ghana', 'Guinea', 'Equatorial Guinea', 'Tunisia', 'Cameroon', 
          'Gambia', 'South Africa', 'Mali', 'Libya', 'Guinea-Bissau', 'Ivory Coast', 'Kenya', 'Mauritania', 'Angola', 'Congo',
          'Ethiopia', 'Burkina Faso', 'Tanzania', 'Sierra Leone', 'Mozambique', 'Sudan', 'Benin', 'Madagascar', 'Somalia',
          'Togo', 'Uganda', 'Cape Verde', 'Niger', 'Mauritius', 'Namibia', 'Zimbabwe', 'Rwanda', 'Liberia', 'Guinea-Bissau',
          'São Tomé and Príncipe', 'Eritrea', 'Malawi', 'Central African Republic', 'Botswana', 'Burundi', 'Gabon', 'Zambia', 
          'Chad', 'Seychelles', 'Comoros', 'Djibouti','Swaziland', 'Democratic Republic of the Congo', 'Lesotho', 
          'Central African Republic', 'Other Africa countries or territories', 'Morocco']
latinamerica = ['Colombia', 'Honduras', 'Peru', 'Venezuela', 'Bolivia', 'Ecuador', 'Argentina', 'Brazil', 'Dominican Republic', 
                'Paraguay', 'Chile', 'Cuba', 'El Salvador', 'Uruguay','Nicaragua', 'Costa Rica', 'Panama', 'Guatemala', 
                'Dominica', 'Haiti', 'Trinidad and Tobago', 'Jamaica', 'Barbados', 'Bahamas', 'Saint Vincent and the Grenadines',
                'Guyana', 'Suriname', 'Antigua and Barbuda', 'Belize', 'Grenada', 'Saint Kitts and Nevis','Bahamas',
                "Other South America countries or territories",]
northamerica = ['United States,', 'Mexico', 'Canada', ]
asia = ['China', 'Pakistan', 'Philippines', 'India', 'Bangladesh', 'Armenia', 'Japan', 'Nepal', 'Turkey', 'Iran', 'South Korea',
        'Syria', 'Israel', 'Lebanon', 'Kazakhstan', 'Vietnam', 'Thailand','Jordan', 'Indonesia', 'Azerbaijan', 'Cyprus', 'Iraq',
        'Uzbekistan', 'Afghanistan', 'Saudi Arabia', 'Palestine', 'Malaysia', 'Mongolia', 'Singapore', 'Kyrgyzstan', 'Kuwait',
        'Sri Lanka', 'Yemen', 'Qatar', 'Turkmenistan', 'United Arab Emirates','Myanmar','Cambodia ','Tajikistan', 'Bahrain', 
        'Maldives', 'Lao', 'Oman', 'Other Asia countries or territories', 'Korea', 'Kyrgyzstan', 'United Arab Emirates', 'Yemen', 'Iraq', 'Laos']
oceania = ['Australia', 'New Zealand', 'Solomon Islands', 'Stateless Persons', 'Other Oceania countries or territories',]

In [27]:
zip_nat = zip(nat_cat, nat_eng)
dict_nat = dict(zip_nat)
nationality.Nationality.replace(dict_nat, inplace=True)


In [30]:
fig = px.box(nationality.loc[((nationality.Amount > 4) & (nationality.Nationality  != "Spain")), :], x="Amount", title="Nationality Boxplot", color="Gender",animation_frame="Year")
fig.show()

In [31]:
fig = px.histogram(nationality.loc[((nationality.Amount > 4) & (nationality.Nationality  != "Espanya")), :], x="Amount", title="Average Age Histogram", color="Gender",animation_frame="Year")
fig.show()

In [33]:
fig = px.histogram(nationality.loc[((nationality.Amount > 4) & (nationality.Nationality  != "Espanya")), :], x="Amount", title="Average Age Histogram", color="Gender",animation_frame="Year")
fig.show()

# Age

*01.Year* - Year of the data

*02.Code_Dist* - District code

*03.Name_Dist* - Name of the district

*04.Code_NBHD* - Neighborhood code

*05.Name_NBHD* - Name of the neighborhood

*06.Gender* - Gender of people (Binary category)

*07.Nationality* - Nationality of people

*08.Age* - Age of people (every 5 years)

*09.Amount* - Number of people

In [34]:
age = pd.read_csv("../datasets/People_Age.csv")
age

Unnamed: 0,Year,Code_Dist,Name_Dist,Code_NBHD,Name_NBHD,Gender,Nationality,Age,Amount
0,2019,1,Ciutat Vella,1,el Raval,Home,Espanyola,0-4 anys,438
1,2019,1,Ciutat Vella,2,el Barri Gòtic,Home,Espanyola,0-4 anys,126
2,2019,1,Ciutat Vella,3,la Barceloneta,Home,Espanyola,0-4 anys,123
3,2019,1,Ciutat Vella,4,"Sant Pere, Santa Caterina i la Ribera",Home,Espanyola,0-4 anys,228
4,2019,2,Eixample,5,el Fort Pienc,Home,Espanyola,0-4 anys,464
...,...,...,...,...,...,...,...,...,...
18391,2017,10,Sant Martí,69,Diagonal Mar i el Front Marítim del Poblenou,Dona,Estrangera,100 anys i més,0
18392,2017,10,Sant Martí,70,el Besòs i el Maresme,Dona,Estrangera,100 anys i més,0
18393,2017,10,Sant Martí,71,Provençals del Poblenou,Dona,Estrangera,100 anys i més,0
18394,2017,10,Sant Martí,72,Sant Martí de Provençals,Dona,Estrangera,100 anys i més,0


profile = ProfileReport(age, title="age Report")
profile.to_notebook_iframe()
profile.to_file("age_report.html")

In [35]:
age.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18396 entries, 0 to 18395
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Year         18396 non-null  int64 
 1   Code_Dist    18396 non-null  int64 
 2   Name_Dist    18396 non-null  object
 3   Code_NBHD    18396 non-null  int64 
 4   Name_NBHD    18396 non-null  object
 5   Gender       18396 non-null  object
 6   Nationality  18396 non-null  object
 7   Age          18396 non-null  object
 8   Amount       18396 non-null  int64 
dtypes: int64(4), object(5)
memory usage: 1.3+ MB


In [36]:
age.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Year,18396.0,2018.0,0.82,2017.0,2017.0,2018.0,2019.0,2019.0
Code_Dist,18396.0,6.25,2.79,1.0,4.0,7.0,8.0,10.0
Code_NBHD,18396.0,37.0,21.07,1.0,19.0,37.0,55.0,73.0
Amount,18396.0,266.6,339.06,0.0,20.0,125.0,398.25,2381.0


In [38]:
fig = px.bar(age, x="Age", y= 'Amount', barmode='overlay',title="Amount of People",animation_frame="Year",
                   color_discrete_map=color_dict, color="Nationality",  template="plotly_white",
                   hover_data=['Name_NBHD','Name_Dist'], labels={'Name_NBHD':'Neighborhood Name'})
                   
fig.show()