In [1]:
import pandas as pd # For retrieving and manipulating data

import numpy as np


In [2]:
dfPoliticians = pd.read_csv("wp_politicians_by_country.csv", dtype={"revision_id":"Int64"})
dfPoliticians = dfPoliticians.loc[:, ~dfPoliticians.columns.str.contains('^Unnamed')]
print(dfPoliticians.head())

           article_title  revision_id article_quality      Country  \
0        Shahjahan Noori   1099689043              GA  Afghanistan   
1  Abdul Ghafar Lakanwal    943562276           Start  Afghanistan   
2         Majah Ha Adrif    852404094           Start  Afghanistan   
3      Haroon al-Afghani   1095102390               B  Afghanistan   
4            Tayyab Agha   1104998382           Start  Afghanistan   

       Region  Population  
0  SOUTH ASIA        41.1  
1  SOUTH ASIA        41.1  
2  SOUTH ASIA        41.1  
3  SOUTH ASIA        41.1  
4  SOUTH ASIA        41.1  


In [10]:
countryCounts = dfPoliticians['Country'].value_counts().reset_index(name='Count').rename(columns={'index': 'Country', 'Count': 'Count'})
print(countryCounts.head())
index = countryCounts.Country
countries = pd.DataFrame(countryCounts, columns=['Country', 'Article_Counts'])
countries["Article_Counts"] = countryCounts.Count

highQuality = dfPoliticians[(dfPoliticians.article_quality == "FA") | (dfPoliticians.article_quality == "GA")]
print(highQuality.head())

countries["Population"] = 0
countries["Region"] = ""

for i in range(len(index)):
    pop = dfPoliticians[dfPoliticians.Country == index[i]].iloc[0]
    countries.loc[i, "Population"] = pop.Population
    countries.loc[i, "Region"] = pop.Region
countries = countries[countries.Population > 0.0]
print(countries.head())

   Country  Count
0   France    251
1  Nigeria    222
2    India    179
3   Russia    174
4   Poland    168
         article_title  revision_id article_quality      Country      Region  \
0      Shahjahan Noori   1099689043              GA  Afghanistan  SOUTH ASIA   
55   Ahmed Wali Karzai   1090245979              GA  Afghanistan  SOUTH ASIA   
59      Masoud Khalili   1103105365              GA  Afghanistan  SOUTH ASIA   
93      Amrullah Saleh   1115022704              FA  Afghanistan  SOUTH ASIA   
107   Nur ul-Haq Ulumi   1107429109              GA  Afghanistan  SOUTH ASIA   

     Population  
0          41.1  
55         41.1  
59         41.1  
93         41.1  
107        41.1  
   Country  Article_Counts  Population          Region
0   France             251        65.8  WESTERN EUROPE
1  Nigeria             222       218.5  WESTERN AFRICA
2    India             179      1417.2      SOUTH ASIA
3   Russia             174       144.3  EASTERN EUROPE
4   Poland             168  

In [4]:
highQuality = dfPoliticians[(dfPoliticians.article_quality == "FA") | (dfPoliticians.article_quality == "GA")]
highQualityCounts = highQuality['Country'].value_counts().reset_index(name='Count').rename(columns={'index': 'Country', 'Count': 'Count'})

countries["High_Quality_Articles"] = 0

for i in range(len(highQualityCounts)):
    country = highQualityCounts.loc[i, "Country"]
    countries.loc[countries.Country == country, "High_Quality_Articles"] = highQualityCounts.loc[i, "Count"]

In [5]:
countries["Article_Per_Capita"] = countries.Article_Counts/countries.Population
countries["Quality_Articles_Per_Capita"] = countries.High_Quality_Articles/countries.Population
print(countries.head())

   Country  Article_Counts  Population  High_Quality_Articles  \
0   France             251        65.8                     11   
1  Nigeria             222       218.5                      4   
2    India             179      1417.2                      6   
3   Russia             174       144.3                     16   
4   Poland             168        38.0                      7   

   Article_Per_Capita  Quality_Articles_Per_Capita  
0            3.814590                     0.167173  
1            1.016018                     0.018307  
2            0.126305                     0.004234  
3            1.205821                     0.110880  
4            4.421053                     0.184211  


In [6]:
print(countries.nlargest(10, "Article_Per_Capita",keep='first'))

                            Country  Article_Counts  Population  \
118             Antigua and Barbuda              17         0.1   
133  Federated States of Micronesia              13         0.1   
149                         Andorra              10         0.1   
89                         Barbados              28         0.3   
150                Marshall Islands               9         0.1   
73                       Montenegro              36         0.6   
160                      Seychelles               6         0.1   
69                       Luxembourg              37         0.7   
63                           Bhutan              41         0.8   
166                         Grenada               5         0.1   

     High_Quality_Articles  Article_Per_Capita  Quality_Articles_Per_Capita  
118                      0          170.000000                          0.0  
133                      0          130.000000                          0.0  
149                      2  

In [7]:
print(countries.nsmallest(10, "Article_Per_Capita",keep='first'))

          Country  Article_Counts  Population  High_Quality_Articles  \
181         China               2      1436.6                      0   
183        Mexico               1       127.5                      0   
170  Saudi Arabia               3        36.7                      2   
180       Romania               2        19.0                      2   
2           India             179      1417.2                      6   
174     Sri Lanka               3        22.4                      0   
128         Egypt              14       103.5                      0   
98       Ethiopia              25       123.4                      3   
165        Taiwan               5        23.2                      0   
94        Vietnam              27        99.4                      2   

     Article_Per_Capita  Quality_Articles_Per_Capita  
181            0.001392                     0.000000  
183            0.007843                     0.000000  
170            0.081744                   

In [8]:
print(countries.nlargest(10, "Quality_Articles_Per_Capita",keep='first'))

                   Country  Article_Counts  Population  High_Quality_Articles  \
149                Andorra              10         0.1                      2   
73              Montenegro              36         0.6                      3   
25                 Albania              83         2.8                      6   
103               Suriname              23         0.6                      1   
50      Bosnia-Herzegovina              52         3.4                      5   
26               Lithuania              75         2.8                      3   
48                 Croatia              54         3.8                      4   
60                Slovenia              43         2.1                      2   
29   Palestinian Territory              71         5.4                      5   
162                  Gabon               6         2.4                      2   

     Article_Per_Capita  Quality_Articles_Per_Capita  
149          100.000000                    20.000000 

In [9]:
print(countries.nsmallest(10, "Quality_Articles_Per_Capita",keep='first'))

             Country  Article_Counts  Population  High_Quality_Articles  \
5              Italy             157        58.9                      0   
21            Brazil              89       214.8                      0   
23           Austria              86         9.0                      0   
37          Zimbabwe              63        16.3                      0   
39         Venezuela              62        28.3                      0   
45        Bangladesh              56       171.2                      0   
46            Turkey              56        85.2                      0   
47         Argentina              54        46.2                      0   
51  Congo, Dem. Rep.              51        99.0                      0   
54            Greece              50        10.6                      0   

    Article_Per_Capita  Quality_Articles_Per_Capita  
5             2.665535                          0.0  
21            0.414339                          0.0  
23          

In [26]:
populationPerRegion = countries.groupby(['Region']).agg({'Population': 'sum'}).reset_index()
articlesPerRegion = countries.groupby(['Region']).agg({'Article_Counts': 'sum'}).reset_index()

articlesPerRegion["Population"] = 0.0
articlesPerRegion["Articles_Per_Capita"] = 0.0

print(populationPerRegion.head())
print(articlesPerRegion.head())

for i in range(len(articlesPerRegion)):
    region = articlesPerRegion.loc[i, "Region"]
    regionPop = populationPerRegion.loc[populationPerRegion.Region == region,"Population"]
    numArticles = articlesPerRegion.loc[i, "Article_Counts"]
    articlesPerRegion[:i].Population = regionPop
    articlesPerRegion.loc[:i] = numArticles/regionPop
print(articlesPerRegion.head())

            Region  Population
0        CARIBBEAN        39.5
1  CENTRAL AMERICA       177.9
2     CENTRAL ASIA        78.0
3        EAST ASIA      1665.8
4   EASTERN AFRICA       470.3
            Region  Article_Counts  Population  Articles_Per_Capita
0        CARIBBEAN             201         0.0                  0.0
1  CENTRAL AMERICA             195         0.0                  0.0
2     CENTRAL ASIA             106         0.0                  0.0
3        EAST ASIA             246         0.0                  0.0
4   EASTERN AFRICA             650         0.0                  0.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  articlesPerRegion[:i].Population = regionPop


ValueError: Must have equal len keys and value when setting with an iterable