# Analysis 
Now that we have generated 1000 random wiki pages and found their paths to philosophy, it's time to do some analysis! 

In [1]:
import pandas as pd
import os
import re
import feather

In [22]:
df = pd.read_feather('links_to_philosophy.feather')

# make list of links in 'chain' column 
def linksToList(x): # path is comma separated string of wikilinks 
    x = x.split(r'https://')[1:]
    return [r'https://' + p for p in x] # return list of working wikilinks 
df.chain = df.chain.map(lambda x: linksToList(x))

# make 'failed' column which shows whether web scraping failed 
def failed(x): 
    if len(x) == 0: 
        return True
    else: 
        return False
df['failed'] = df.chain.map(lambda x: failed(x))

# display
percFail = round(len(df[df.failed == True]) / len(df) * 100, 1)
percPhilo = round(len(df[df.philosophy == True]) / (len(df) - len(df[df.failed == True])) * 100, 1) 
print('% links that reached philosophy: {}'.format(percPhilo))
print('% links where webscraper failed: {}'.format(percFail))
df.head()

% links that reached philosophy: 78.0
% links where webscraper failed: 8.6


Unnamed: 0,rLinks,chain,philosophy,failed
0,"https://en.wikipedia.org/wiki/Mansurlu,_Feke","[https://en.wikipedia.org/wiki/Mansurlu,_Feke,...",True,False
1,https://en.wikipedia.org/wiki/Net_neutrality_i...,[https://en.wikipedia.org/wiki/Net_neutrality_...,True,False
2,https://en.wikipedia.org/wiki/Richard_Sissons,[https://en.wikipedia.org/wiki/Richard_Sissons...,True,False
3,https://en.wikipedia.org/wiki/Meriwether_Natio...,[https://en.wikipedia.org/wiki/Meriwether_Nati...,True,False
4,https://en.wikipedia.org/wiki/Netechma_bicerit...,[],False,True


In [26]:
df = df[~df.failed == True] # get rid of rows that failed 


Unnamed: 0,rLinks,chain,philosophy,failed
0,"https://en.wikipedia.org/wiki/Mansurlu,_Feke","[https://en.wikipedia.org/wiki/Mansurlu,_Feke,...",True,False
1,https://en.wikipedia.org/wiki/Net_neutrality_i...,[https://en.wikipedia.org/wiki/Net_neutrality_...,True,False
2,https://en.wikipedia.org/wiki/Richard_Sissons,[https://en.wikipedia.org/wiki/Richard_Sissons...,True,False
3,https://en.wikipedia.org/wiki/Meriwether_Natio...,[https://en.wikipedia.org/wiki/Meriwether_Nati...,True,False
5,https://en.wikipedia.org/wiki/Kristina_Fröjmark,[https://en.wikipedia.org/wiki/Kristina_Fröjma...,True,False
...,...,...,...,...
995,https://en.wikipedia.org/wiki/Josef_Effertz,"[https://en.wikipedia.org/wiki/Josef_Effertz,,...",True,False
996,https://en.wikipedia.org/wiki/Trapezoid_body,"[https://en.wikipedia.org/wiki/Trapezoid_body,...",True,False
997,https://en.wikipedia.org/wiki/Gilberto_Peña,"[https://en.wikipedia.org/wiki/Gilberto_Peña,,...",True,False
998,https://en.wikipedia.org/wiki/Stanley_Tomshinsky,[https://en.wikipedia.org/wiki/Stanley_Tomshin...,False,False
