# Scraping BBC website
Turn it in a searchable dataframe

In [72]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
pd.set_option('display.max_rows', 400)
pd.set_option('display.max_colwidth', 500)

In [73]:
response = requests.get('http://www.bbc.com/culture/story/20160819-the-21st-centurys-100-greatest-films-who-voted')
doc = BeautifulSoup(response.text, 'html.parser')
all_info = doc.find_all(class_= 'body-content')
all_p = doc.find_all('p')

## Put them in a dataframe

In [74]:
list_of_movie = []
for lines in all_p[3:]:
    if lines.strong is not None:
        critic_info = lines.find('strong').string
        regex_for_name = r"^\w+\s\w+ "
        regex_for_org =  r"\w\s[^-]\s(\w.*) [()]"
        regex_for_cn = r"[(](\w+)[)]"
        try:
            critics_name = re.findall(regex_for_name, critic_info)[0]
            critics_org = re.findall(regex_for_org, critic_info)[0]
            critics_cn = re.findall(regex_for_cn, critic_info)[0]
        except:
            pass
        movie_info = lines.find_next_sibling()      
        each_movie = movie_info.find_all(string=True)
        
        for movie_list in each_movie:
            movie_dict = {}
            movie = movie_list
            regex_for_rank =r"^\d+"
            regex_for_mname = r"\d. (\w.*)\s[(]"
            regex_for_dir = r"[(](\w.*),"
            regex_for_year = r", (\d+)[)]" 
            movie_rank = re.findall(regex_for_rank,movie)
            try:
                movie_dict['critics_name'] = critics_name
                movie_dict['critics_org'] = critics_org
                movie_dict['critics_cn'] = critics_cn
                movie_dict['movie_name'] = re.findall(regex_for_mname,movie)[0]
                movie_dict['movie_dir'] = re.findall(regex_for_dir,movie)[0]
                movie_dict['movie_year'] = re.findall(regex_for_year,movie)[0]
            except:
                pass

        
            list_of_movie.append(movie_dict)
list_of_movie

[{'critics_name': 'Simon Abrams ',
  'critics_org': 'Freelance film critic',
  'critics_cn': 'US',
  'movie_name': 'Mulholland Drive',
  'movie_dir': 'David Lynch',
  'movie_year': '2001'},
 {'critics_name': 'Simon Abrams ',
  'critics_org': 'Freelance film critic',
  'critics_cn': 'US',
  'movie_name': 'In the Mood for Love',
  'movie_dir': 'Wong Kar-wai',
  'movie_year': '2000'},
 {'critics_name': 'Simon Abrams ',
  'critics_org': 'Freelance film critic',
  'critics_cn': 'US',
  'movie_name': 'The Tree of Life',
  'movie_dir': 'Terrence Malick',
  'movie_year': '2011'},
 {'critics_name': 'Simon Abrams ',
  'critics_org': 'Freelance film critic',
  'critics_cn': 'US',
  'movie_name': 'Yi Yi: A One and a Two',
  'movie_dir': 'Edward Yang',
  'movie_year': '2000'},
 {'critics_name': 'Simon Abrams ',
  'critics_org': 'Freelance film critic',
  'critics_cn': 'US',
  'movie_name': 'Goodbye to Language',
  'movie_dir': 'Jean-Luc Godard',
  'movie_year': '2014'},
 {'critics_name': 'Simon Abr

In [75]:
df_movie = pd.read_csv("df_movie_all.csv", dtype='object')
df_movie

Unnamed: 0,critics_name,critics_org,critics_cn,movie_name,movie_dir,movie_year
0,Simon Abrams,Freelance film critic,US,Mulholland Drive,David Lynch,2001
1,Simon Abrams,Freelance film critic,US,In the Mood for Love,Wong Kar-wai,2000
2,Simon Abrams,Freelance film critic,US,The Tree of Life,Terrence Malick,2011
3,Simon Abrams,Freelance film critic,US,Yi Yi: A One and a Two,Edward Yang,2000
4,Simon Abrams,Freelance film critic,US,Goodbye to Language,Jean-Luc Godard,2014
5,Simon Abrams,Freelance film critic,US,The White Meadows,Mohammad Rasoulof,2009
6,Simon Abrams,Freelance film critic,US,Night Across the Street,Raoul Ruiz,2012
7,Simon Abrams,Freelance film critic,US,Certified Copy,Abbas Kiarostami,2010
8,Simon Abrams,Freelance film critic,US,Sparrow,Johnnie To,2008
9,Simon Abrams,Freelance film critic,US,Fados,Carlos Saura,2007


In [76]:
#whole movies selected
df_movie['movie_name'].nunique()

592

In [77]:
#whole directors selected
df_movie['movie_dir'].nunique()

408

# Scraping  IMDb directors' born country

In [8]:
from bs4 import BeautifulSoup
def get_country_info(row):
    try:
        direcor = row['movie_dir']
        director_name = direcor.replace(" ", "+")
        director_name
        result = requests.get('http://www.imdb.com/find?ref_=nv_sr_fn&q='+director_name+'&s=all').content
        soup_doc = BeautifulSoup(result, "html.parser")
        info = soup_doc.find_all(class_= 'result_text')[0].find('a')['href']
        page = 'https://www.imdb.com'+info

        info_page = requests.get(page).content
        info_page
        page_doc = BeautifulSoup(info_page, "html.parser")
        country = page_doc.find(id='name-born-info').find_all('a')[-1].text
        print(country)

        return pd.Series({
            'dir_country': country      
        })
    except:
        return pd.Series({})   

In [None]:
new_df = df_movie.apply(get_country_info, axis=1).join(df_movie)
new_df = new_df[['critics_name', 'critics_org', 'critics_cn', 'movie_name','movie_dir', 'dir_country', 'movie_year']]
new_df

Missoula, Montana, USA
Missoula, Montana, USA
Shanghai, China
Ottawa, Illinois, USA
Shanghai, China
Paris, France
Shiraz, Iran
Puerto Montt, Chile
Tehran, Iran
Huesca, Aragón, Spain
Shanghai, China
Versailles, Seine-et-Oise [now Yvelines], France
Bangkok, Thailand
Tokyo, Japan
Texas, USA
Houston, Texas, USA
Ottawa, Illinois, USA
Tehran, Iran
Louisville, Kentucky, USA
San Carlos, California, USA
Toronto, Ontario, Canada
Houston, Texas, USA
Toronto, Ontario, Canada
Hastings, New Zealand
Versailles, Seine-et-Oise [now Yvelines], France
Lisbon, Portugal
Bilbao, Vizcaya, País Vasco, Spain
Memphis, Tennessee, USA


In [78]:
new_df_movie_bs = pd.read_csv("new_df_movie_bs.csv", dtype='object')
new_df_movie_bs

Unnamed: 0,critics_name,critics_org,critics_cn,movie_name,movie_dir,dir_country,movie_year
0,Simon Abrams,Freelance film critic,US,Mulholland Drive,David Lynch,"Missoula, Montana, USA",2001
1,Simon Abrams,Freelance film critic,US,In the Mood for Love,Wong Kar-wai,"Shanghai, China",2000
2,Simon Abrams,Freelance film critic,US,The Tree of Life,Terrence Malick,"Ottawa, Illinois, USA",2011
3,Simon Abrams,Freelance film critic,US,Yi Yi: A One and a Two,Edward Yang,"Shanghai, China",2000
4,Simon Abrams,Freelance film critic,US,Goodbye to Language,Jean-Luc Godard,"Paris, France",2014
5,Simon Abrams,Freelance film critic,US,The White Meadows,Mohammad Rasoulof,"Shiraz, Iran",2009
6,Simon Abrams,Freelance film critic,US,Night Across the Street,Raoul Ruiz,"Puerto Montt, Chile",2012
7,Simon Abrams,Freelance film critic,US,Certified Copy,Abbas Kiarostami,"Tehran, Iran",2010
8,Simon Abrams,Freelance film critic,US,Sparrow,Johnnie To,Hong Kong,2008
9,Simon Abrams,Freelance film critic,US,Fados,Carlos Saura,"Huesca, Aragón, Spain",2007


## Dealing with NaNs and scrap again using selenuim

In [80]:
nans = lambda new_df_movie_bs: new_df_movie_bs[new_df_movie_bs.isnull().any(axis=1)]
new_df_movie_bs_nans = nans(new_df_movie_bs)[['movie_dir']]
new_df_movie_bs_nans.shape

(209, 1)

In [None]:
driver = webdriver.Chrome()
import time
def get_country_info(row):
    try:
        driver.get("https://www.imdb.com/")
        time.sleep(1)
        text_input = driver.find_element_by_id("navbar-query")
        time.sleep(1)
        text_input.send_keys(row['movie_dir'])
        time.sleep(1)
        info = driver.find_element_by_class_name("suggestionlabel")
        time.sleep(1)
        info.click()
        time.sleep(1)
        country = driver.find_element_by_id('name-born-info').text

        print("This director is from", country)

        return pd.Series({
            'dir_cn': country
        })
    except:
        return pd.Series({})

In [None]:
new_df_movie_bs_fix = new_df_movie_bs_nans.apply(get_country_info, axis=1).join(new_df_movie_bs_nans)
new_df_movie_bs_fix = new_df_movie_bs_fix[['movie_dir', 'dir_cn']]
new_df_movie_bs_fix

In [81]:
pd.read_csv("new_df_movie_bs_fix.csv", dtype='object')

Unnamed: 0,movie_dir,dir_cn
0,Jia Zhangke,
1,Joel and Ethan Coen,"Born: Calgary, Alberta, Canada"
2,Andrew Stanton and Lee Unkrich,"Born: December 3, 1965 in Boston, Massachusetts, USA"
3,Zhang Yimou,"Born: November 14, 1951 in Xi'an, Shaanxi, China"
4,Danièle Huillet and Jean-Marie Straub,"Born: May 1, 1936 in Paris, France"
5,Pippo Delbono,
6,Marco De Angelis and Antonio Di Trapani,
7,Yervant Gianikian and Angela Ricci Lucchi,"Born: 1942 in Merano, Trentino-Alto Adige, Italy"
8,Rick Alverson,
9,Joel and Ethan Coen,"Born: Calgary, Alberta, Canada"


## Scraping movies' score and genres

In [None]:
def get_movie_info(row):
    try:
        movie = row['movie_name']
        movie_name = movie.replace(" ", "+")
        movie_name
        result = requests.get('http://www.imdb.com/find?ref_=nv_sr_fn&q='+movie_name+'&s=all').content
        soup_doc = BeautifulSoup(result, "html.parser")
        info = soup_doc.find_all(class_= 'result_text')[0].find('a')['href']
        page = 'https://www.imdb.com'+info
        info_page = requests.get(page).content
        info_page
        page_doc = BeautifulSoup(info_page, "html.parser")
        
        score = page_doc.find(class_='ratingValue').text
        score1 = score.replace(' ','')
        score2 = score1.replace('\n','')
        print(score2)
        genres = page_doc.find(class_='subtext').find_all('a')[0:-1]
        genre_name = []
        for genre in genres:
            genre = genre.find('span')
            genre_name.append(genre.string)
        print(genre_name)

        return pd.Series({
            'imdb_score': score2,
            'movie_genre': genre_name
        })
    except:
        return pd.Series({})

In [None]:
new_df3 = new_df_movie_bs.apply(get_movie_info, axis=1).join(new_df_movie_bs)
new_df3 = new_df3[['critics_name', 'critics_org', 'critics_cn', 'movie_name','movie_dir', 'dir_country', 'imdb_score', 'movie_genre', 'movie_year']]

In [None]:
pd.read_csv("new_df_movie_all_bs.csv", dtype='object')

## Filling NaNs and cleaning

In [None]:
df1 = pd.read_csv("new_df_movie_all_bs.csv",dtype='object')
df2 = pd.read_csv("new_df_movie_bs_fix.csv")

In [None]:
df1.dir_country.fillna(df2.dir_cn, inplace=True)

Get countries out and get scores out

In [None]:
df1['dir_country_cleaned'] = df1['dir_country'].str.extract(r",?\s?(\w+\s?\w+?)$")

In [None]:
df1['movie_genre'] = df1['movie_genre'].str.replace("'",'')
df1['movie_genre'] = df1['movie_genre'].str.replace("[",'')
df1['movie_genre'] = df1['movie_genre'].str.replace("]",'')
df1['imdb_score_cleaned'] = df1['imdb_score'].str.extract(r"(^\d.*)/")

In [None]:
pd.read_csv("new_df_movie_ds_cleaned.csv", dtype='object')

## Some cleaning and checking by hand....
Some of directors information are missing on IMDb, so I use google to fill some by hand... that is silly don't do that like me, but I got less than 30 NaNs for dir_country after. 

In [None]:
df = pd.read_csv("new_df_movie_ds_cleaned_hand.csv")
df.head(10)

In [None]:
#whole countries directors  are from
df['dir_country_cleaned'].nunique()

## Getting all the Asian directors' movie

In [None]:
df_asian = df[df['dir_country_cleaned'].isin(['Taiwan', 'Hong Kong', 'China', 'Korea', 'South Korea', 'Japan', 'Thailand', 'Malaysia', 'India', 'Iran', 'Israel', 'Lebanon', 'Turkey'])]
df_asian.head(5)

In [None]:
pd.read_csv("df_asian_movie.csv")

## Getting those Asian movies score from douban, a relative reliable Chinese movie website.

In [7]:
df_asian_movie2 = pd.read_csv("df_asian_movie_douban.csv", encoding = "ISO-8859-1")
df_asian_movie2

Unnamed: 0,critics_name,critics_cn,movie_name,movie_dir,movie_genre,movie_year,dir_country_cleaned,imdb_score_cleaned,douban_score
0,Michael Atkinson,US,2046,Wong Kar-wai,"Drama, Romance, Sci-Fi",2004,China,7.5,7.5
1,Alyssa Rosenberg,US,2046,Wong Kar-wai,"Drama, Romance, Sci-Fi",2004,China,7.5,7.5
2,Mario Espinosa,Cuba,3-Iron,Kim Ki-duk,"Crime, Drama, Romance",2004,South Korea,8.1,8.0
3,E Nina,Italy,A Borrowed Identity,Eran Riklis,Drama,2014,Israel,7.2,
4,Tim Appelo,US,A Separation,Asghar Farhadi,"Drama, Mystery",2011,Iran,8.3,8.7
5,Nicholas Barber,UK,A Separation,Asghar Farhadi,"Drama, Mystery",2011,Iran,8.3,8.7
6,Lizelle Bisschoff,UK,A Separation,Asghar Farhadi,"Drama, Mystery",2011,Iran,8.3,8.7
7,Utpal Borpujari,India,A Separation,Asghar Farhadi,"Drama, Mystery",2011,Iran,8.3,8.7
8,Luke Buckmaster,Australia,A Separation,Asghar Farhadi,"Drama, Mystery",2011,Iran,8.3,8.7
9,Samuel Castro,Colombia,A Separation,Asghar Farhadi,"Drama, Mystery",2011,Iran,8.3,8.7


## A dataframe with each movie and their votes get from critics.

In [8]:
df_votes = df_asian_movie2.groupby('movie_name').movie_dir.value_counts().sort_values(ascending=False).reset_index(name='votes')
df_votes

Unnamed: 0,movie_name,movie_dir,votes
0,In the Mood for Love,Wong Kar-wai,49
1,Spirited Away,Hayao Miyazaki,34
2,A Separation,Asghar Farhadi,28
3,Yi Yi: A One and a Two,Edward Yang,22
4,Brokeback Mountain,Ang Lee,11
5,Oldboy,Park Chan-wook,11
6,Uncle Boonmee Who Can Recall His Past Lives,Apichatpong Weerasethakul,9
7,Certified Copy,Abbas Kiarostami,9
8,"Crouching Tiger, Hidden Dragon",Ang Lee,8
9,Once Upon a Time in Anatolia,Nuri Bilge Ceylan,7


The way I get unique rows for each movie

In [9]:
df3= df_votes.merge(df_asian_movie2, how='left', on='movie_name')
df3 = df3[df3['movie_name'] != df3['movie_name'].shift()]

In [10]:
df3 = df3[['movie_name', 'movie_dir_x', 'votes', 'movie_genre', 'dir_country_cleaned', 'imdb_score_cleaned', 'douban_score']]
df3 = df3.rename(index=str, columns={"movie_dir_x": "movie_dir"})
df3

Unnamed: 0,movie_name,movie_dir,votes,movie_genre,dir_country_cleaned,imdb_score_cleaned,douban_score
0,In the Mood for Love,Wong Kar-wai,49,"Drama, Romance",China,8.1,8.5
49,Spirited Away,Hayao Miyazaki,34,"Animation, Adventure, Family",Japan,8.6,9.3
83,A Separation,Asghar Farhadi,28,"Drama, Mystery",Iran,8.3,8.7
111,Yi Yi: A One and a Two,Edward Yang,22,"Drama, Romance",China,8.1,8.9
133,Brokeback Mountain,Ang Lee,11,"Drama, Romance",Taiwan,7.7,8.7
144,Oldboy,Park Chan-wook,11,"Action, Drama, Mystery",South Korea,8.4,8.2
155,Uncle Boonmee Who Can Recall His Past Lives,Apichatpong Weerasethakul,9,"Drama, Fantasy",Thailand,6.6,6.8
164,Certified Copy,Abbas Kiarostami,9,Drama,Iran,7.3,
173,"Crouching Tiger, Hidden Dragon",Ang Lee,8,"Action, Adventure, Fantasy",Taiwan,7.9,8.0
181,Once Upon a Time in Anatolia,Nuri Bilge Ceylan,7,"Crime, Drama",Turkey,7.8,7.6


# Get structured for shape join

In [82]:
import numpy as np
df3['string'] = df3['movie_dir'] + "<b>" + " (" + df3['movie_name'] + ")" + "</b>" + "<br>" + df3['votes'].map(str) + np.where(df3["votes"]>1, ' votes', ' vote')  + "</br>" + "Douban score is:" + " " + df3['douban_score'].map(str) + ". " + "IMDb score is:" + " " + df3['imdb_score_cleaned'].map(str) 
df3.sort_values(by = 'votes', ascending=False)

Unnamed: 0,movie_name,movie_dir,votes,movie_genre,dir_country_cleaned,imdb_score_cleaned,douban_score,string
0,In the Mood for Love,Wong Kar-wai,49,"Drama, Romance",China,8.1,8.5,Wong Kar-wai<b> (In the Mood for Love)</b><br>49 votes</br>Douban score is: 8.5. IMDb score is: 8.1
49,Spirited Away,Hayao Miyazaki,34,"Animation, Adventure, Family",Japan,8.6,9.3,Hayao Miyazaki<b> (Spirited Away)</b><br>34 votes</br>Douban score is: 9.3. IMDb score is: 8.6
83,A Separation,Asghar Farhadi,28,"Drama, Mystery",Iran,8.3,8.7,Asghar Farhadi<b> (A Separation)</b><br>28 votes</br>Douban score is: 8.7. IMDb score is: 8.3
111,Yi Yi: A One and a Two,Edward Yang,22,"Drama, Romance",China,8.1,8.9,Edward Yang<b> (Yi Yi: A One and a Two)</b><br>22 votes</br>Douban score is: 8.9. IMDb score is: 8.1
133,Brokeback Mountain,Ang Lee,11,"Drama, Romance",Taiwan,7.7,8.7,Ang Lee<b> (Brokeback Mountain)</b><br>11 votes</br>Douban score is: 8.7. IMDb score is: 7.7
144,Oldboy,Park Chan-wook,11,"Action, Drama, Mystery",South Korea,8.4,8.2,Park Chan-wook<b> (Oldboy)</b><br>11 votes</br>Douban score is: 8.2. IMDb score is: 8.4
155,Uncle Boonmee Who Can Recall His Past Lives,Apichatpong Weerasethakul,9,"Drama, Fantasy",Thailand,6.6,6.8,Apichatpong Weerasethakul<b> (Uncle Boonmee Who Can Recall His Past Lives)</b><br>9 votes</br>Douban score is: 6.8. IMDb score is: 6.6
164,Certified Copy,Abbas Kiarostami,9,Drama,Iran,7.3,,Abbas Kiarostami<b> (Certified Copy)</b><br>9 votes</br>Douban score is: nan. IMDb score is: 7.3
173,"Crouching Tiger, Hidden Dragon",Ang Lee,8,"Action, Adventure, Fantasy",Taiwan,7.9,8.0,"Ang Lee<b> (Crouching Tiger, Hidden Dragon)</b><br>8 votes</br>Douban score is: 8.0. IMDb score is: 7.9"
181,Once Upon a Time in Anatolia,Nuri Bilge Ceylan,7,"Crime, Drama",Turkey,7.8,7.6,Nuri Bilge Ceylan<b> (Once Upon a Time in Anatolia)</b><br>7 votes</br>Douban score is: 7.6. IMDb score is: 7.8


In [83]:
output = df3.groupby('dir_country_cleaned')['string'].apply(lambda x: "<p>%s</p>" % '</p><p> '.join(x)).reset_index(name='properties.article')

In [84]:
output.iloc[3]['properties.article']

'<p>Ari Folman<b> (Waltz with Bashir)</b><br>4 votes</br>Douban score is: 8.7. IMDb score is: 8.0</p><p> Hany Abu-Assad<b> (Paradise Now)</b><br>2 votes</br>Douban score is: 7.7. IMDb score is: 7.5</p><p> Ari Folman<b> (The Congress)</b><br>2 votes</br>Douban score is: 7.7. IMDb score is: 6.5</p><p> Elia Suleiman<b> (Divine Intervention)</b><br>1 vote</br>Douban score is: 7.3. IMDb score is: 6.6</p><p> Ronit Elkabetz and Shlomi Elkabetz<b> (Gett: The Trial of Viviane Amselem)</b><br>1 vote</br>Douban score is: 8.3. IMDb score is: 7.7</p><p> Scandar Copti and Yaron Shani<b> (Ajami)</b><br>1 vote</br>Douban score is: 7.0. IMDb score is: 7.4</p><p> Eran Riklis<b> (A Borrowed Identity)</b><br>1 vote</br>Douban score is: nan. IMDb score is: 7.2</p><p> Dror Moreh<b> (The Gatekeepers)</b><br>1 vote</br>Douban score is: 7.6. IMDb score is: 7.6</p><p> Bazi Gete<b> (Red Leaves)</b><br>1 vote</br>Douban score is: nan. IMDb score is: 6.6</p><p> Eran Kolirin<b> (The Band¨¹s Visit)</b><br>1 vote</br

In [85]:
directors = df_asian_movie2.groupby('dir_country_cleaned')['movie_dir'].nunique().reset_index(name='properties.headline')

In [86]:
movies = df_asian_movie2.groupby('dir_country_cleaned')['movie_name'].nunique().reset_index(name='properties.headline')

In [87]:
output = output.merge(directors, how='left', on= 'dir_country_cleaned')
output = output.merge(movies, how='left', on= 'dir_country_cleaned')
output.head(3)

Unnamed: 0,dir_country_cleaned,properties.article,properties.headline_x,properties.headline_y
0,China,<p>Wong Kar-wai<b> (In the Mood for Love)</b><br>49 votes</br>Douban score is: 8.5. IMDb score is: 8.1</p><p> Edward Yang<b> (Yi Yi: A One and a Two)</b><br>22 votes</br>Douban score is: 8.9. IMDb score is: 8.1</p><p> Hou Hsiao-hsien<b> (The Assassin)</b><br>5 votes</br>Douban score is: 7.2. IMDb score is: 6.3</p><p> Jia Zhangke<b> (A Touch of Sin)</b><br>4 votes</br>Douban score is: 7.9. IMDb score is: 7.1</p><p> Jia Zhangke<b> (Still Life)</b><br>3 votes</br>Douban score is: 8.0. IMDb scor...,10,19
1,India,<p>Mira Nair<b> (Monsoon Wedding)</b><br>2 votes</br>Douban score is: 7.5. IMDb score is: 7.4</p><p> Vishal Bhardwaj<b> (Maqbool)</b><br>2 votes</br>Douban score is: nan. IMDb score is: 8.2</p><p> Chaitanya Tamhane<b> (Court)</b><br>1 vote</br>Douban score is: 7.0. IMDb score is: 7.7</p><p> Raam Reddy<b> (Thithi)</b><br>1 vote</br>Douban score is: 7.6. IMDb score is: 8.4</p><p> Anurag Kashyap<b> (Dev D)</b><br>1 vote</br>Douban score is: nan. IMDb score is: 8.1</p><p> Anurag Kashyap<b> (Gang...,10,12
2,Iran,<p>Asghar Farhadi<b> (A Separation)</b><br>28 votes</br>Douban score is: 8.7. IMDb score is: 8.3</p><p> Abbas Kiarostami<b> (Certified Copy)</b><br>9 votes</br>Douban score is: nan. IMDb score is: 7.3</p><p> Abbas Kiarostami<b> (Ten)</b><br>4 votes</br>Douban score is: 7.6. IMDb score is: 5.7</p><p> Asghar Farhadi<b> (About Elly)</b><br>3 votes</br>Douban score is: 8.1. IMDb score is: 8.1</p><p> Abbas Kiarostami<b> (Like Someone In Love)</b><br>2 votes</br>Douban score is: 7.5. IMDb score is...,6,12


In [88]:
out_properties = pd.read_csv('BBC_asian_properties.csv')

In [89]:
import json
from pandas.io.json import json_normalize

In [90]:
with open('asiacustom.geo.json') as json_data:
    geometry_data = json.load(json_data)

for feat in geometry_data['features']:
    del feat["properties"]['brk_group']
    del feat["properties"]['formal_fr']
    del feat["properties"]['note_adm0']
    del feat["properties"]['note_brk']
    del feat["properties"]['name_alt']
    del feat["properties"]['fips_10']
    del feat["properties"]['formal_en']

In [91]:
out_geometry = pd.DataFrame.from_dict(json_normalize(geometry_data['features']), orient='columns')

In [92]:
output_complete = pd.merge(out_properties, out_geometry[['geometry.coordinates','geometry.type','properties.subunit', 'properties.subregion']], left_on='properties.name', right_on='properties.subunit', how='left')
output_complete

Unnamed: 0,properties.name,properties.article,properties.headline,geometry.coordinates,geometry.type,properties.subunit,properties.subregion
0,China,<p>Wong Kar-wai<b> (In the Mood for Love)</b><br>49 votes</br>Douban score is: 8.5. IMDb score is: 8.1</p><p> Edward Yang<b> (Yi Yi: A One and a Two)</b><br>22 votes</br>Douban score is: 8.9. IMDb score is: 8.1</p><p> Hou Hsiao-hsien<b> (The Assassin)</b><br>5 votes</br>Douban score is: 7.2. IMDb score is: 6.3</p><p> Jia Zhangke<b> (A Touch of Sin)</b><br>4 votes</br>Douban score is: 7.9. IMDb score is: 7.1</p><p> Jia Zhangke<b> (Still Life)</b><br>3 votes</br>Douban score is: 8.0. IMDb scor...,10 directors selected.19 movies selected,"[[[[110.88876953125006, 19.99194335937497], [110.93828125, 19.94755859374999], [110.97070312499997, 19.883300781250057], [110.99765625000005, 19.764697265625045], [111.01367187500001, 19.65546875000001], [110.91269531250006, 19.586083984374994], [110.82226562500003, 19.557910156250017], [110.64091796875, 19.291210937499955], [110.603125, 19.20703125000003], [110.57216796875005, 19.171875], [110.5625, 19.13515625000005], [110.56601562500006, 19.09853515625005], [110.51933593750002, 18.9702148...",MultiPolygon,China,Eastern Asia
1,India,<p>Mira Nair<b> (Monsoon Wedding)</b><br>2 votes</br>Douban score is: 7.5. IMDb score is: 7.4</p><p> Vishal Bhardwaj<b> (Maqbool)</b><br>2 votes</br>Douban score is: nan. IMDb score is: 8.2</p><p> Chaitanya Tamhane<b> (Court)</b><br>1 vote</br>Douban score is: 7.0. IMDb score is: 7.7</p><p> Raam Reddy<b> (Thithi)</b><br>1 vote</br>Douban score is: 7.6. IMDb score is: 8.4</p><p> Anurag Kashyap<b> (Dev D)</b><br>1 vote</br>Douban score is: nan. IMDb score is: 8.1</p><p> Anurag Kashyap<b> (Gang...,10 directors selected.12 movies selected,"[[[[93.8900390625, 6.831054687499986], [93.82880859375003, 6.748681640624994], [93.70927734375002, 7.000683593749997], [93.65800781250007, 7.016064453125026], [93.65634765625005, 7.136230468749985], [93.68417968750006, 7.183593750000042], [93.8224609375001, 7.236621093750018], [93.85898437500006, 7.206835937499989], [93.92958984375005, 6.973486328124963], [93.8900390625, 6.831054687499986]]], [[[93.7335937500001, 7.356494140625017], [93.63847656250007, 7.261865234375008], [93.59726562500012,...",MultiPolygon,India,Southern Asia
2,Iran,<p>Asghar Farhadi<b> (A Separation)</b><br>28 votes</br>Douban score is: 8.7. IMDb score is: 8.3</p><p> Abbas Kiarostami<b> (Certified Copy)</b><br>9 votes</br>Douban score is: nan. IMDb score is: 7.3</p><p> Abbas Kiarostami<b> (Ten)</b><br>4 votes</br>Douban score is: 7.6. IMDb score is: 5.7</p><p> Asghar Farhadi<b> (About Elly)</b><br>3 votes</br>Douban score is: 8.1. IMDb score is: 8.1</p><p> Abbas Kiarostami<b> (Like Someone In Love)</b><br>2 votes</br>Douban score is: 7.5. IMDb score is...,6 directors selected.12 movies selected,"[[[[56.18798828125002, 26.92114257812497], [56.0949218750001, 26.80117187500005], [55.95429687500004, 26.70112304687501], [55.894140625, 26.732275390625034], [55.847656249999964, 26.730810546875006], [55.74726562500003, 26.692480468750034], [55.674609375000074, 26.68583984374999], [55.54316406250004, 26.617529296875034], [55.42373046875005, 26.58310546875003], [55.340429687500006, 26.585742187500045], [55.31152343749997, 26.592626953125002], [55.29501953125, 26.639208984375014], [55.29648437...",MultiPolygon,Iran,Southern Asia
3,Israel,<p>Ari Folman<b> (Waltz with Bashir)</b><br>4 votes</br>Douban score is: 8.7. IMDb score is: 8.0</p><p> Hany Abu-Assad<b> (Paradise Now)</b><br>2 votes</br>Douban score is: 7.7. IMDb score is: 7.5</p><p> Ari Folman<b> (The Congress)</b><br>2 votes</br>Douban score is: 7.7. IMDb score is: 6.5</p><p> Elia Suleiman<b> (Divine Intervention)</b><br>1 vote</br>Douban score is: 7.3. IMDb score is: 6.6</p><p> Ronit Elkabetz and Shlomi Elkabetz<b> (Gett: The Trial of Viviane Amselem)</b><br>1 vote</b...,10 directors selected.12 movies selected,"[[[35.78730468750001, 32.734912109374996], [35.734472656250006, 32.72890625], [35.61123046875002, 32.682080078125], [35.59453125000002, 32.668017578124996], [35.572851562500006, 32.640869140625], [35.56904296875001, 32.619873046875], [35.551464843750004, 32.39550781249999], [35.484375, 32.401660156249996], [35.40263671875002, 32.450634765625], [35.38671875, 32.493017578125], [35.36210937500002, 32.507470703124994], [35.303808593750006, 32.512939453125], [35.19326171875002, 32.534423828125], ...",Polygon,Israel,Western Asia
4,Japan,<p>Hayao Miyazaki<b> (Spirited Away)</b><br>34 votes</br>Douban score is: 9.3. IMDb score is: 8.6</p><p> Hirokazu Koreeda<b> (Still Walking)</b><br>3 votes</br>Douban score is: 8.8. IMDb score is: 8.0</p><p> Kiyoshi Kurosawa<b> (Pulse)</b><br>3 votes</br>Douban score is: 6.8. IMDb score is: 4.7</p><p> Hayao Miyazaki<b> (The Wind Rises)</b><br>1 vote</br>Douban score is: 7.8. IMDb score is: 7.8</p><p> Kinji Fukasaku<b> (Battle Royale)</b><br>1 vote</br>Douban score is: nan. IMDb score is: 7.7...,9 directors selected.12 movies selected,"[[[[123.88867187499997, 24.280126953124977], [123.8255859375, 24.266064453124955], [123.74980468750002, 24.283300781250006], [123.68066406250003, 24.288037109375008], [123.67978515625012, 24.317773437500023], [123.75234375, 24.348486328125063], [123.75371093750006, 24.391308593749983], [123.77148437499996, 24.41445312499999], [123.93486328125, 24.362011718749983], [123.928125, 24.323632812499966], [123.88867187499997, 24.280126953124977]]], [[[124.29316406250003, 24.515917968750074], [124.23...",MultiPolygon,Japan,Eastern Asia
5,Lebanon,<p>Jihan El-Tahri<b> (Cuba: An African Odyssey)</b><br>1 vote</br>Douban score is: nan. IMDb score is: 8.3</p><p> Nadine Labaki<b> (Caramel)</b><br>1 vote</br>Douban score is: 7.8. IMDb score is: 7.1</p><p> Nadine Labaki<b> (Where Do We Go Now?)</b><br>1 vote</br>Douban score is: 7.8. IMDb score is: 7.5</p><p> Mohamed Soueid<b> (My Heart Beats Only for Her)</b><br>1 vote</br>Douban score is: nan. IMDb score is: nan</p>,3 directors selected.4 movies selected,"[[[35.869140625, 33.43173828124999], [35.84072265625002, 33.415673828124994], [35.7875, 33.369775390624994], [35.734472656250006, 33.3326171875], [35.62724609375001, 33.275048828124994], [35.60292968750002, 33.240625], [35.579296875, 33.271484375], [35.53251953125002, 33.25048828125], [35.4931640625, 33.119482421875], [35.411230468750006, 33.07568359375], [35.30888671875002, 33.079541015625], [35.22333984375001, 33.0919921875], [35.10859375, 33.08369140625], [35.15507812500002, 33.1600097656...",Polygon,Lebanon,Western Asia
6,Malaysia,"<p>Tsai Ming-liang<b> (Goodbye, Dragon Inn)</b><br>1 vote</br>Douban score is: 7.3. IMDb score is: 7.1</p><p> Tsai Ming-liang<b> (What Time Is It There?)</b><br>1 vote</br>Douban score is: 7.8. IMDb score is: 7.3</p><p> Tsai Ming-liang<b> (Stray Dogs)</b><br>1 vote</br>Douban score is: 7.2. IMDb score is: 6.8</p><p> Tsai Ming-liang<b> (Journey to the West)</b><br>1 vote</br>Douban score is: 6.8. IMDb score is: 6.8</p><p> Tsai Ming-liang<b> (I Don't Want to Sleep Alone)</b><br>1 vote</br>Doub...",1 director selected.5 movies selected,"[[[[111.38925781250006, 2.415332031250031], [111.35869140625007, 2.40219726562502], [111.31152343749997, 2.437597656250034], [111.30039062500006, 2.741162109374969], [111.33349609374997, 2.768310546875], [111.35507812500012, 2.764453125], [111.3783203125, 2.709326171874963], [111.37626953125002, 2.576318359375051], [111.38046875000006, 2.458935546875026], [111.38925781250006, 2.415332031250031]]], [[[104.22158203125, 2.731738281250003], [104.17333984375001, 2.721337890625023], [104.146875, 2...",MultiPolygon,Malaysia,South-Eastern Asia
7,South Korea,"<p>Park Chan-wook<b> (Oldboy)</b><br>11 votes</br>Douban score is: 8.2. IMDb score is: 8.4</p><p> Bong Joon-ho<b> (Mother)</b><br>3 votes</br>Douban score is: 8.3. IMDb score is: 6.7</p><p> Lee Chang-dong<b> (Secret Sunshine)</b><br>3 votes</br>Douban score is: 7.8. IMDb score is: 7.5</p><p> Kim Ki-duk<b> (Spring, Summer, Fall, Winter¨¹and Spring)</b><br>3 votes</br>Douban score is: 8.5. IMDb score is: 8.1</p><p> Bong Joon-ho<b> (Memories of Murder)</b><br>3 votes</br>Douban score is: 8.7. I...",6 directors selected.13 movies selected,"[[[[126.326953125, 33.2236328125], [126.28203125000002, 33.20151367187503], [126.24023437500003, 33.21484375], [126.22900390624996, 33.22524414062499], [126.17871093750001, 33.282568359375034], [126.16562500000012, 33.31201171875], [126.19941406250004, 33.36806640625005], [126.33769531250003, 33.46040039062501], [126.69550781250004, 33.54931640625003], [126.7598632812501, 33.55322265625], [126.90117187500002, 33.51513671874997], [126.93125000000012, 33.44384765625003], [126.9053710937501, 33...",MultiPolygon,South Korea,Eastern Asia
8,Taiwan,"<p>Ang Lee<b> (Brokeback Mountain)</b><br>11 votes</br>Douban score is: 8.7. IMDb score is: 7.7</p><p> Ang Lee<b> (Crouching Tiger, Hidden Dragon)</b><br>8 votes</br>Douban score is: 8.0. IMDb score is: 7.9</p><p> Ang Lee<b> (Lust, Caution)</b><br>1 vote</br>Douban score is: 8.2. IMDb score is: 7.6</p><p> Ang Lee<b> (Life of Pi)</b><br>1 vote</br>Douban score is: 9.0. IMDb score is: 7.9</p>",1 director selected.4 movies selected,"[[[[118.4074218750001, 24.522119140624994], [118.45117187500001, 24.45556640625003], [118.43271484375006, 24.41435546875007], [118.29511718750003, 24.436328125000017], [118.28730468750004, 24.476611328125045], [118.33935546874999, 24.46914062500005], [118.4074218750001, 24.522119140624994]]], [[[121.00878906249996, 22.620361328124968], [120.946875, 22.503076171874966], [120.89736328125, 22.379150390625053], [120.87734375, 22.26220703125003], [120.87841796874997, 22.141552734374955], [120.864...",MultiPolygon,Taiwan,Eastern Asia
9,Thailand,<p>Apichatpong Weerasethakul<b> (Uncle Boonmee Who Can Recall His Past Lives)</b><br>9 votes</br>Douban score is: 6.8. IMDb score is: 6.6</p><p> Apichatpong Weerasethakul<b> (Tropical Malady)</b><br>6 votes</br>Douban score is: 7.8. IMDb score is: 7.3</p><p> Apichatpong Weerasethakul<b> (Syndromes and a Century)</b><br>5 votes</br>Douban score is: 7.7. IMDb score is: 7.5</p><p> Apichatpong Weerasethakul<b> (Blissfully Yours)</b><br>1 vote</br>Douban score is: 7.6. IMDb score is: 7.0</p>,1 director selected.4 movies selected,"[[[[99.66308593749997, 6.521923828124983], [99.64404296875003, 6.516113281250043], [99.60664062500004, 6.596826171874966], [99.65400390625008, 6.714111328124972], [99.70136718750004, 6.570556640625028], [99.66308593749997, 6.521923828124983]]], [[[99.07841796875007, 7.591845703125003], [99.10439453125005, 7.471289062500035], [99.06787109374997, 7.495898437499988], [99.03769531250012, 7.54848632812505], [99.03808593750003, 7.625732421874972], [99.04511718750004, 7.636523437500045], [99.078417...",MultiPolygon,Thailand,South-Eastern Asia


In [93]:
def dis_num(district):
    if district == 'Eastern Asia':
        return '1'
    elif district == 'Southern Asia':
        return'2'
    elif district == 'South-Eastern Asia':
        return'3'
    elif district == 'Western Asia':
        return'4'
    else:
        return district

def dis_name(district):
    if district == 'District of Columbia':
        return district
    else:
        return district
    
def dis_color(district):
    if district == 'Eastern Asia':
        return '#00204a'
    elif district == 'Southern Asia':
        return'#005792'
    elif district == 'South-Eastern Asia':
        return'#d9faff'
    elif district == 'Western Asia':
        return'#00bbf0'
    else:
        return district   

In [94]:
output_complete['properties.group_name'] = output_complete['properties.subregion'].apply(lambda x: dis_name(x))
output_complete['properties.group_id'] = output_complete['properties.subregion'].apply(lambda x: dis_num(x))
output_complete['properties.color'] = output_complete['properties.subregion'].apply(lambda x: dis_color(x))

In [95]:
output_complete

Unnamed: 0,properties.name,properties.article,properties.headline,geometry.coordinates,geometry.type,properties.subunit,properties.subregion,properties.group_name,properties.group_id,properties.color
0,China,<p>Wong Kar-wai<b> (In the Mood for Love)</b><br>49 votes</br>Douban score is: 8.5. IMDb score is: 8.1</p><p> Edward Yang<b> (Yi Yi: A One and a Two)</b><br>22 votes</br>Douban score is: 8.9. IMDb score is: 8.1</p><p> Hou Hsiao-hsien<b> (The Assassin)</b><br>5 votes</br>Douban score is: 7.2. IMDb score is: 6.3</p><p> Jia Zhangke<b> (A Touch of Sin)</b><br>4 votes</br>Douban score is: 7.9. IMDb score is: 7.1</p><p> Jia Zhangke<b> (Still Life)</b><br>3 votes</br>Douban score is: 8.0. IMDb scor...,10 directors selected.19 movies selected,"[[[[110.88876953125006, 19.99194335937497], [110.93828125, 19.94755859374999], [110.97070312499997, 19.883300781250057], [110.99765625000005, 19.764697265625045], [111.01367187500001, 19.65546875000001], [110.91269531250006, 19.586083984374994], [110.82226562500003, 19.557910156250017], [110.64091796875, 19.291210937499955], [110.603125, 19.20703125000003], [110.57216796875005, 19.171875], [110.5625, 19.13515625000005], [110.56601562500006, 19.09853515625005], [110.51933593750002, 18.9702148...",MultiPolygon,China,Eastern Asia,Eastern Asia,1,#00204a
1,India,<p>Mira Nair<b> (Monsoon Wedding)</b><br>2 votes</br>Douban score is: 7.5. IMDb score is: 7.4</p><p> Vishal Bhardwaj<b> (Maqbool)</b><br>2 votes</br>Douban score is: nan. IMDb score is: 8.2</p><p> Chaitanya Tamhane<b> (Court)</b><br>1 vote</br>Douban score is: 7.0. IMDb score is: 7.7</p><p> Raam Reddy<b> (Thithi)</b><br>1 vote</br>Douban score is: 7.6. IMDb score is: 8.4</p><p> Anurag Kashyap<b> (Dev D)</b><br>1 vote</br>Douban score is: nan. IMDb score is: 8.1</p><p> Anurag Kashyap<b> (Gang...,10 directors selected.12 movies selected,"[[[[93.8900390625, 6.831054687499986], [93.82880859375003, 6.748681640624994], [93.70927734375002, 7.000683593749997], [93.65800781250007, 7.016064453125026], [93.65634765625005, 7.136230468749985], [93.68417968750006, 7.183593750000042], [93.8224609375001, 7.236621093750018], [93.85898437500006, 7.206835937499989], [93.92958984375005, 6.973486328124963], [93.8900390625, 6.831054687499986]]], [[[93.7335937500001, 7.356494140625017], [93.63847656250007, 7.261865234375008], [93.59726562500012,...",MultiPolygon,India,Southern Asia,Southern Asia,2,#005792
2,Iran,<p>Asghar Farhadi<b> (A Separation)</b><br>28 votes</br>Douban score is: 8.7. IMDb score is: 8.3</p><p> Abbas Kiarostami<b> (Certified Copy)</b><br>9 votes</br>Douban score is: nan. IMDb score is: 7.3</p><p> Abbas Kiarostami<b> (Ten)</b><br>4 votes</br>Douban score is: 7.6. IMDb score is: 5.7</p><p> Asghar Farhadi<b> (About Elly)</b><br>3 votes</br>Douban score is: 8.1. IMDb score is: 8.1</p><p> Abbas Kiarostami<b> (Like Someone In Love)</b><br>2 votes</br>Douban score is: 7.5. IMDb score is...,6 directors selected.12 movies selected,"[[[[56.18798828125002, 26.92114257812497], [56.0949218750001, 26.80117187500005], [55.95429687500004, 26.70112304687501], [55.894140625, 26.732275390625034], [55.847656249999964, 26.730810546875006], [55.74726562500003, 26.692480468750034], [55.674609375000074, 26.68583984374999], [55.54316406250004, 26.617529296875034], [55.42373046875005, 26.58310546875003], [55.340429687500006, 26.585742187500045], [55.31152343749997, 26.592626953125002], [55.29501953125, 26.639208984375014], [55.29648437...",MultiPolygon,Iran,Southern Asia,Southern Asia,2,#005792
3,Israel,<p>Ari Folman<b> (Waltz with Bashir)</b><br>4 votes</br>Douban score is: 8.7. IMDb score is: 8.0</p><p> Hany Abu-Assad<b> (Paradise Now)</b><br>2 votes</br>Douban score is: 7.7. IMDb score is: 7.5</p><p> Ari Folman<b> (The Congress)</b><br>2 votes</br>Douban score is: 7.7. IMDb score is: 6.5</p><p> Elia Suleiman<b> (Divine Intervention)</b><br>1 vote</br>Douban score is: 7.3. IMDb score is: 6.6</p><p> Ronit Elkabetz and Shlomi Elkabetz<b> (Gett: The Trial of Viviane Amselem)</b><br>1 vote</b...,10 directors selected.12 movies selected,"[[[35.78730468750001, 32.734912109374996], [35.734472656250006, 32.72890625], [35.61123046875002, 32.682080078125], [35.59453125000002, 32.668017578124996], [35.572851562500006, 32.640869140625], [35.56904296875001, 32.619873046875], [35.551464843750004, 32.39550781249999], [35.484375, 32.401660156249996], [35.40263671875002, 32.450634765625], [35.38671875, 32.493017578125], [35.36210937500002, 32.507470703124994], [35.303808593750006, 32.512939453125], [35.19326171875002, 32.534423828125], ...",Polygon,Israel,Western Asia,Western Asia,4,#00bbf0
4,Japan,<p>Hayao Miyazaki<b> (Spirited Away)</b><br>34 votes</br>Douban score is: 9.3. IMDb score is: 8.6</p><p> Hirokazu Koreeda<b> (Still Walking)</b><br>3 votes</br>Douban score is: 8.8. IMDb score is: 8.0</p><p> Kiyoshi Kurosawa<b> (Pulse)</b><br>3 votes</br>Douban score is: 6.8. IMDb score is: 4.7</p><p> Hayao Miyazaki<b> (The Wind Rises)</b><br>1 vote</br>Douban score is: 7.8. IMDb score is: 7.8</p><p> Kinji Fukasaku<b> (Battle Royale)</b><br>1 vote</br>Douban score is: nan. IMDb score is: 7.7...,9 directors selected.12 movies selected,"[[[[123.88867187499997, 24.280126953124977], [123.8255859375, 24.266064453124955], [123.74980468750002, 24.283300781250006], [123.68066406250003, 24.288037109375008], [123.67978515625012, 24.317773437500023], [123.75234375, 24.348486328125063], [123.75371093750006, 24.391308593749983], [123.77148437499996, 24.41445312499999], [123.93486328125, 24.362011718749983], [123.928125, 24.323632812499966], [123.88867187499997, 24.280126953124977]]], [[[124.29316406250003, 24.515917968750074], [124.23...",MultiPolygon,Japan,Eastern Asia,Eastern Asia,1,#00204a
5,Lebanon,<p>Jihan El-Tahri<b> (Cuba: An African Odyssey)</b><br>1 vote</br>Douban score is: nan. IMDb score is: 8.3</p><p> Nadine Labaki<b> (Caramel)</b><br>1 vote</br>Douban score is: 7.8. IMDb score is: 7.1</p><p> Nadine Labaki<b> (Where Do We Go Now?)</b><br>1 vote</br>Douban score is: 7.8. IMDb score is: 7.5</p><p> Mohamed Soueid<b> (My Heart Beats Only for Her)</b><br>1 vote</br>Douban score is: nan. IMDb score is: nan</p>,3 directors selected.4 movies selected,"[[[35.869140625, 33.43173828124999], [35.84072265625002, 33.415673828124994], [35.7875, 33.369775390624994], [35.734472656250006, 33.3326171875], [35.62724609375001, 33.275048828124994], [35.60292968750002, 33.240625], [35.579296875, 33.271484375], [35.53251953125002, 33.25048828125], [35.4931640625, 33.119482421875], [35.411230468750006, 33.07568359375], [35.30888671875002, 33.079541015625], [35.22333984375001, 33.0919921875], [35.10859375, 33.08369140625], [35.15507812500002, 33.1600097656...",Polygon,Lebanon,Western Asia,Western Asia,4,#00bbf0
6,Malaysia,"<p>Tsai Ming-liang<b> (Goodbye, Dragon Inn)</b><br>1 vote</br>Douban score is: 7.3. IMDb score is: 7.1</p><p> Tsai Ming-liang<b> (What Time Is It There?)</b><br>1 vote</br>Douban score is: 7.8. IMDb score is: 7.3</p><p> Tsai Ming-liang<b> (Stray Dogs)</b><br>1 vote</br>Douban score is: 7.2. IMDb score is: 6.8</p><p> Tsai Ming-liang<b> (Journey to the West)</b><br>1 vote</br>Douban score is: 6.8. IMDb score is: 6.8</p><p> Tsai Ming-liang<b> (I Don't Want to Sleep Alone)</b><br>1 vote</br>Doub...",1 director selected.5 movies selected,"[[[[111.38925781250006, 2.415332031250031], [111.35869140625007, 2.40219726562502], [111.31152343749997, 2.437597656250034], [111.30039062500006, 2.741162109374969], [111.33349609374997, 2.768310546875], [111.35507812500012, 2.764453125], [111.3783203125, 2.709326171874963], [111.37626953125002, 2.576318359375051], [111.38046875000006, 2.458935546875026], [111.38925781250006, 2.415332031250031]]], [[[104.22158203125, 2.731738281250003], [104.17333984375001, 2.721337890625023], [104.146875, 2...",MultiPolygon,Malaysia,South-Eastern Asia,South-Eastern Asia,3,#d9faff
7,South Korea,"<p>Park Chan-wook<b> (Oldboy)</b><br>11 votes</br>Douban score is: 8.2. IMDb score is: 8.4</p><p> Bong Joon-ho<b> (Mother)</b><br>3 votes</br>Douban score is: 8.3. IMDb score is: 6.7</p><p> Lee Chang-dong<b> (Secret Sunshine)</b><br>3 votes</br>Douban score is: 7.8. IMDb score is: 7.5</p><p> Kim Ki-duk<b> (Spring, Summer, Fall, Winter¨¹and Spring)</b><br>3 votes</br>Douban score is: 8.5. IMDb score is: 8.1</p><p> Bong Joon-ho<b> (Memories of Murder)</b><br>3 votes</br>Douban score is: 8.7. I...",6 directors selected.13 movies selected,"[[[[126.326953125, 33.2236328125], [126.28203125000002, 33.20151367187503], [126.24023437500003, 33.21484375], [126.22900390624996, 33.22524414062499], [126.17871093750001, 33.282568359375034], [126.16562500000012, 33.31201171875], [126.19941406250004, 33.36806640625005], [126.33769531250003, 33.46040039062501], [126.69550781250004, 33.54931640625003], [126.7598632812501, 33.55322265625], [126.90117187500002, 33.51513671874997], [126.93125000000012, 33.44384765625003], [126.9053710937501, 33...",MultiPolygon,South Korea,Eastern Asia,Eastern Asia,1,#00204a
8,Taiwan,"<p>Ang Lee<b> (Brokeback Mountain)</b><br>11 votes</br>Douban score is: 8.7. IMDb score is: 7.7</p><p> Ang Lee<b> (Crouching Tiger, Hidden Dragon)</b><br>8 votes</br>Douban score is: 8.0. IMDb score is: 7.9</p><p> Ang Lee<b> (Lust, Caution)</b><br>1 vote</br>Douban score is: 8.2. IMDb score is: 7.6</p><p> Ang Lee<b> (Life of Pi)</b><br>1 vote</br>Douban score is: 9.0. IMDb score is: 7.9</p>",1 director selected.4 movies selected,"[[[[118.4074218750001, 24.522119140624994], [118.45117187500001, 24.45556640625003], [118.43271484375006, 24.41435546875007], [118.29511718750003, 24.436328125000017], [118.28730468750004, 24.476611328125045], [118.33935546874999, 24.46914062500005], [118.4074218750001, 24.522119140624994]]], [[[121.00878906249996, 22.620361328124968], [120.946875, 22.503076171874966], [120.89736328125, 22.379150390625053], [120.87734375, 22.26220703125003], [120.87841796874997, 22.141552734374955], [120.864...",MultiPolygon,Taiwan,Eastern Asia,Eastern Asia,1,#00204a
9,Thailand,<p>Apichatpong Weerasethakul<b> (Uncle Boonmee Who Can Recall His Past Lives)</b><br>9 votes</br>Douban score is: 6.8. IMDb score is: 6.6</p><p> Apichatpong Weerasethakul<b> (Tropical Malady)</b><br>6 votes</br>Douban score is: 7.8. IMDb score is: 7.3</p><p> Apichatpong Weerasethakul<b> (Syndromes and a Century)</b><br>5 votes</br>Douban score is: 7.7. IMDb score is: 7.5</p><p> Apichatpong Weerasethakul<b> (Blissfully Yours)</b><br>1 vote</br>Douban score is: 7.6. IMDb score is: 7.0</p>,1 director selected.4 movies selected,"[[[[99.66308593749997, 6.521923828124983], [99.64404296875003, 6.516113281250043], [99.60664062500004, 6.596826171874966], [99.65400390625008, 6.714111328124972], [99.70136718750004, 6.570556640625028], [99.66308593749997, 6.521923828124983]]], [[[99.07841796875007, 7.591845703125003], [99.10439453125005, 7.471289062500035], [99.06787109374997, 7.495898437499988], [99.03769531250012, 7.54848632812505], [99.03808593750003, 7.625732421874972], [99.04511718750004, 7.636523437500045], [99.078417...",MultiPolygon,Thailand,South-Eastern Asia,South-Eastern Asia,3,#d9faff


In [96]:
ok_json = json.loads(output_complete.to_json(orient='records'))

In [97]:
def process_to_geojson(file):
    geo_data = {"type": "FeatureCollection", "features":[]}
    for row in file:
        this_dict = {"type": "Feature", "properties":{}, "geometry": {}}
        for key, value in row.items():
            key_names = key.split('.')
            if key_names[0] == 'geometry':
                this_dict['geometry'][key_names[1]] = value
            if str(key_names[0]) == 'properties':
                this_dict['properties'][key_names[1]] = value
        geo_data['features'].append(this_dict)
    return geo_data

In [98]:
geo_format = process_to_geojson(ok_json)

In [99]:
with open('geo-data.js', 'w') as outfile:
    outfile.write("var infoData = ")
with open('geo-data.js', 'a') as outfile:
    json.dump(geo_format, outfile)