In [79]:
import json 
import pandas as pd
import re
import nltk
from nltk.util import ngrams
from collections import Counter
from pprint import pprint
from sklearn.feature_extraction.text import TfidfVectorizer

df = pd.read_json("./gg2013.json")

In [80]:
#Analyze common phrase occurences
'''
Input: A list of strings after parsing
Output: The most common grams containing 'best'
Remarks:
    1. Longest award phrase is 12 words
'''
def common_phrases(keywords):
    bigrams = []
    trigrams = []
    quadgrams = []
    multigrams = []
    megagrams = []
    for item in keywords:
        clean_string = re.sub('[^A-Za-z0-9]+', ' ', item)
        tokens = nltk.word_tokenize(clean_string)
        grams2 = ngrams(tokens, 2)
        grams3 = ngrams(tokens, 3)
        grams4 = ngrams(tokens, 4)
        grams9 = ngrams(tokens, 9)
        grams12 = ngrams(tokens, 12)
        for gram in grams2:
            bigrams.append(gram)
        for gram in grams3:
            trigrams.append(gram)
        for gram in grams4:
            quadgrams.append(gram)
        for gram in grams9:
            multigrams.append(gram)
        for gram in grams12:
            megagrams.append(gram)

    common2 = Counter(bigrams).most_common()
    common3 = Counter(trigrams).most_common()
    common4 = Counter(quadgrams).most_common()
    common9 = Counter(multigrams).most_common()
    common12 = Counter(megagrams).most_common()

    common2 = filter(lambda x: x[0][0] == "best", common2)
    common3 = filter(lambda x: x[0][0] == "best", common3)
    common4 = filter(lambda x: x[0][0] == "best", common4)
    common9 = filter(lambda x: x[0][0] == "best", common9)
    common12 = filter(lambda x: x[0][0] == "best", common12)

    common2 = list(common2)
    common3 = list(common3)
    common4 = list(common4)
    common9 = list(common9)
    common12 = list(common12)

    combined = common2 + common3 + common4 + common9 + common12
    return combined

In [81]:
'''
Input: List of strings
Output: Strings with its frequency
Remarks:
    1. Can also count common strings in strings for strategy2
'''
def most_common_beststring(strings):
    best_string = filter(lambda x: "best" in x, strings)
    best_string = list(best_string)
    return Counter(best_string).most_common()

In [82]:
#Strategy 1 scan after the word won:
df2 = df[df['text'].str.contains(" won ") | df['text'].str.contains(" Won ")]
tweets = df2['text'].tolist()
keywords = []
#regexp = re.compile(r'[!?.;#]+(?=$|\s)')
regexp = re.compile(r'[!?.;#,@:]')

stop_words = ['at','and','on','because','but','tonight','before','lol','since','i','I']

#Parse Keywords. All phrase after 'won best' will be captured.
for tweet in tweets:
    words = tweet.lower().split()
    index_won = words.index("won")
    if words[index_won + 1] == "best":
        keyword = "best "
        curr_index = index_won + 2
        while curr_index != len(words):
            if regexp.search(words[curr_index]) or words[curr_index] in stop_words:
                #keyword += words[curr_index]
                keyword = keyword.strip()
                break
            keyword += words[curr_index] + " "
            curr_index += 1
        keywords.append(keyword)

strategy1 = common_phrases(keywords)
print(strategy1)

[(('best', 'director'), 41), (('best', 'supporting'), 37), (('best', 'original'), 37), (('best', 'actress'), 28), (('best', 'actor'), 22), (('best', 'picture'), 14), (('best', 'screenplay'), 12), (('best', 'motion'), 11), (('best', 'tv'), 9), (('best', 'performance'), 8), (('best', 'animated'), 8), (('best', 'song'), 7), (('best', 'foreign'), 7), (('best', 'dressed'), 5), (('best', 'movie'), 5), (('best', 'drama'), 4), (('best', 'comedy'), 4), (('best', 'skin'), 1), (('best', 'blurry'), 1), (('best', 'life'), 1), (('best', 'anal'), 1), (('best', 'husband'), 1), (('best', 'film'), 1), (('best', 'pic'), 1), (('best', 'original', 'song'), 23), (('best', 'director', 'for'), 13), (('best', 'supporting', 'actor'), 11), (('best', 'supporting', 'actress'), 11), (('best', 'actor', 'in'), 11), (('best', 'motion', 'picture'), 10), (('best', 'actress', 'for'), 9), (('best', 'performance', 'by'), 8), (('best', 'actress', 'in'), 6), (('best', 'actor', 'for'), 5), (('best', 'screenplay', 'for'), 5), 

In [83]:
#Strategy 2 scan before the word won:
df3 = df[df['text'].str.contains(" goes to ")]
tweets = df3['text'].tolist()
keywords2 = []
#Parse Backwards, stop at the word best
for tweet in tweets:
    words = nltk.word_tokenize(tweet.lower())
    index = words.index("to")
    if words[index - 1] == "goes":
        keyword = ""
        curr_index = index - 2
        while curr_index >= 0:
            if curr_index == 0 or words[curr_index] == "best":
                keyword = words[curr_index] + keyword
                break
            keyword = " " + words[curr_index] + keyword
            curr_index -= 1
        keywords2.append(keyword)

#print(keywords2)
strategy2 = common_phrases(keywords2)
print(strategy2)

most_common_beststring(keywords)

[(('best', 'actress'), 148), (('best', 'supporting'), 142), (('best', 'motion'), 115), (('best', 'actor'), 96), (('best', 'original'), 39), (('best', 'screenplay'), 36), (('best', 'director'), 36), (('best', 'tv'), 33), (('best', 'animated'), 17), (('best', 'foreign'), 15), (('best', 'speech'), 13), (('best', 'comedy'), 9), (('best', 'drama'), 8), (('best', 'performance'), 8), (('best', 'picture'), 8), (('best', 'miniseries'), 7), (('best', 'mini'), 5), (('best', 'movie'), 4), (('best', 'television'), 4), (('best', 'dressed'), 3), (('best', 'goldenglobes'), 3), (('best', 'editing'), 3), (('best', 'sound'), 2), (('best', 'use'), 2), (('best', 'musical'), 2), (('best', 'gay'), 2), (('best', 'presenters'), 2), (('best', 'tan'), 2), (('best', 'hair'), 2), (('best', 'humans'), 1), (('best', 'imitation'), 1), (('best', 'political'), 1), (('best', 'awkward'), 1), (('best', 'originalscore'), 1), (('best', 'painted'), 1), (('best', 'song'), 1), (('best', 'swallowed'), 1), (('best', 'acceptance'

[('best', 90),
 ('best director', 26),
 ('best supporting', 14),
 ('best actress', 12),
 ('best original', 12),
 ('best picture', 10),
 ("best original song for 'ben'", 9),
 ('best director for', 8),
 ('best original song for', 7),
 ('best performance by an actress in a supporting', 6),
 ('best actor', 5),
 ('best original song', 5),
 ('best screenplay', 5),
 ('best animated feature', 5),
 ('best supporting actor for django', 4),
 ('best drama', 4),
 ('best tv', 4),
 ('best song', 4),
 ('best movie', 4),
 ('best motion picture', 4),
 ('best supporting actor', 3),
 ('best supporting actress', 3),
 ('best foreign film', 3),
 ('best foreign', 3),
 ('best actor in a comedy or', 3),
 ('best actress for silver linings', 2),
 ('best supporting actress for les', 2),
 ('best screenplay for django', 2),
 ('best screenplay for', 2),
 ('best actress for keeping up with the', 2),
 ('best actress in a tv series drama (and probably many best dressed', 2),
 ('best comedy or musical', 2),
 ('best actor

In [84]:
common_strings = most_common_beststring(keywords2)

In [85]:
'''
Input: gram sets
Output: phrases with frequency
'''
def accumulate_votes(grams1, grams2, common_strings):
    awards = {}
    '''
    for gram in grams1:
        untokenize = ' '.join(gram[0])
        awards[untokenize] = len(gram[0]) * gram[1]
    for gram in grams2:
        untokenize = ' '.join(gram[0])
        if untokenize in awards:
            awards[untokenize] = awards[untokenize] * 2.5
        else:
            awards[untokenize] = len(gram[0]) * gram[1]
    '''
    for string in common_strings:
        first_two = ' '.join(nltk.word_tokenize(string[0])[:2])
        if string[0] in awards:
            awards[string[0]] = awards[string[0]] * 2.5
        elif first_two in awards:
            awards[string[0]] = awards[first_two] * string[1]
        else:
            awards[string[0]] = len(string[0]) * string[1]
    return sorted(awards.items(), key = lambda x: x[1], reverse = True)
votes = accumulate_votes(strategy1, strategy2, common_strings)
#Need a way to combine similar categories
print(len(votes))

259


In [86]:
collocation_words = {
    #"tv":"television",
    "pic":"picture",
    "for":"-",
    "in":"-",
    'or':'/',
    'of':'-'
}

skip_words = ['a']

paraphrase = [',','@','(',')','#']

# Find a good format for award names.
def gram_cleaning(grams):
    new_grams = []
    for gram in grams:
        word_list = []
        for word in gram[0]:
            if word in collocation_words:
                word = collocation_words[word]
            if word in skip_words:
                break
            if word not in paraphrase:
                word_list.append(word)
        word_tuple = tuple(word_list)
        gram_tuple = (word_tuple,gram[1])
        new_grams.append(gram_tuple)
    return new_grams

# Separate 'A/B' type of words into 'A / B' to add more information to resolute.
def sticky_word_string(phrase):
    phrase_list = nltk.word_tokenize(phrase)
    token_list = []
    for word in phrase_list:
        flag = 0
        for i,character in enumerate(word):
            if character == '/' and i != 0:
                token_list.append(word[0:i])
                token_list.append(word[i])
                flag = i
                if i != len(word)-1:
                    token_list.append(word[i+1:len(word)])
                break
        if flag == 0 and word not in skip_words:
            token_list.append(word)
    clean_string = ' '.join(token_list)
    return clean_string

# Replacing some collocation words.
def string_cleaning(grams):
    new_grams = []
    for gram in grams:
        word_list = []
        clean_gram = sticky_word_string(gram[0])
        temp_list = nltk.word_tokenize(clean_gram)
        for word in temp_list:
            if word in collocation_words:
                word = collocation_words[word]
            if word not in paraphrase:
                word = word.strip()
                word_list.append(word)
        phrase = ' '.join(word_list)
        gram_tuple = (phrase,gram[1])
        new_grams.append(gram_tuple)
    return new_grams

#for vote in votes:
#print(strategy1)
new1 = gram_cleaning(strategy1)
new2 = gram_cleaning(strategy2)
new3 = string_cleaning(common_strings)
votes = accumulate_votes(new1, new2, new3)
#Need a way to combine similar categories
print(len(votes))
votes

227


[('best director - motion picture', 30468.75),
 ('best actress - motion picture drama', 6240.0),
 ('best actress - tv comedy / musical', 6162.5),
 ('best motion picture drama', 5875.0),
 ('best screenplay - motion picture', 5062.5),
 ('best actress - mini-series / tv movie', 2496),
 ('best actress - motion picture comedy / musical', 2400.0),
 ('best actress drama', 2400.0),
 ('best screenplay motion picture', 2025.0),
 ('best actress - comedy / musical', 1920.0),
 ('best supporting actor motion picture', 1764),
 ('best actress - tv series drama', 1344),
 ('best supporting actress tv series miniseries / tv movie', 1210),
 ('best supporting actress - motion picture', 1200.0),
 ('best actor - motion picture drama', 990.0),
 ('best actress tv drama', 960),
 ('best actress tv series comedy', 768),
 ('best motion picture comedy / musical', 675.0),
 ('best actor - motion picture comedy / musical', 660.0),
 ('best actor - miniseries / tv movie', 578),
 ('best actress - miniseries / tv movie', 

In [87]:
# Ignore paraphrases and merge the similar results. Get new votes.
words_pattern = '[a-zA-Z]+'
resolution_list = []
awards_dict = {}
for i,vote in enumerate(votes):
    words = re.findall(words_pattern, vote[0], flags=re.IGNORECASE)
    words = ' '.join(words)
    if words in awards_dict:
        awards_dict[words].append(i)
    else:
        awards_dict[words] = [i]
#pprint(awards_dict)

new_list = []
for key, val in awards_dict.items():
    if len(val) == 1:
        new_list.append(list(votes[val[0]]))
    else:
        sum = 0
        for i in val:
            sum+=votes[i][1]
        new_list.append([votes[val[0]][0],sum])
new_list

[['best director - motion picture', 30468.75],
 ['best actress - motion picture drama', 7872.0],
 ['best actress - tv comedy / musical', 6162.5],
 ['best motion picture drama', 6106.5],
 ['best screenplay - motion picture', 7492.5],
 ['best actress - mini-series / tv movie', 3072],
 ['best actress - motion picture comedy / musical', 2400.0],
 ['best actress drama', 2880.0],
 ['best actress - comedy / musical', 2112.0],
 ['best supporting actor motion picture', 1954],
 ['best actress - tv series drama', 1536],
 ['best supporting actress tv series miniseries / tv movie', 1210],
 ['best supporting actress - motion picture', 1276.0],
 ['best actor - motion picture drama', 1050.0],
 ['best actress tv drama', 1536],
 ['best actress tv series comedy', 768],
 ['best motion picture comedy / musical', 807.0],
 ['best actor - motion picture comedy / musical', 660.0],
 ['best actor - miniseries / tv movie', 578],
 ['best actress - miniseries / tv movie', 576],
 ['best actor tv series - comedy / mu

In [88]:
'''
TF-IDF computation. Get a weighted word vector representation.
'''
pd. set_option('display.max_columns', None)
pd. set_option('display.max_rows', None)

corpus = []
for item in new_list:
    corpus.append(item[0][1:])

vectorizer = TfidfVectorizer()
vectors = vectorizer.fit_transform(corpus)
feature_names = vectorizer.get_feature_names()
dense = vectors.todense()
denselist = dense.tolist()
tfidf = pd.DataFrame(denselist, columns=feature_names)

# Add more importance to some key words (can get from user inputs) which most distinguish different award names.
keyword_list = ['actress','supporting','actor','director','drama','musical','television','comedy','tv']
for keyword in keyword_list:
    tfidf[keyword] = tfidf[keyword]*2
#tfidf.iloc[0]

In [89]:
# Compute pair similarity between each word vectors.
from sklearn.metrics import pairwise_distances
from sklearn.metrics.pairwise import cosine_similarity

# Larger threshold means merge more strictly.
threshold = 0.9
final_list = []
cs = cosine_similarity(tfidf,tfidf)
#ans = pairwise_distances(tfidf,tfidf,'chebyshev')
index_list = []
for v,score_row in enumerate(cs):
    similarity = []
    merge_index = [v]
    for i, score in enumerate(score_row):
        if score > threshold and v!=i:
            similarity.append([new_list[v],score,new_list[i],v,i])
            merge_index.append(i)
    final_list.append(similarity)
    index_list.append(merge_index)
print('The merging result:')
pprint(final_list)
index_list

The merging result:
[[[['best director - motion picture', 30468.75],
   0.9201264731192209,
   ['best director', 195],
   0,
   35]],
 [],
 [[['best actress - tv comedy / musical', 6162.5],
   0.969292715357781,
   ['best actress - tv series comedy / musical', 192],
   2,
   41],
  [['best actress - tv comedy / musical', 6162.5],
   0.969292715357781,
   ['best tv series actress - comedy / musical', 102.5],
   2,
   56]],
 [],
 [[['best screenplay - motion picture', 7492.5],
   1.0000000000000002,
   ['best motion picture screenplay', 30],
   4,
   108]],
 [],
 [[['best actress - motion picture comedy / musical', 2400.0],
   0.901516705559158,
   ['best actress - comedy / musical', 2112.0],
   6,
   8],
  [['best actress - motion picture comedy / musical', 2400.0],
   0.901516705559158,
   ['best actress - musical / comedy', 192],
   6,
   39],
  [['best actress - motion picture comedy / musical', 2400.0],
   1.0,
   ['best actress - motion picture musical / comedy', 192],
   6,
   40]

[[0, 35],
 [1],
 [2, 41, 56],
 [3],
 [4, 108],
 [5],
 [6, 8, 39, 40],
 [7],
 [8, 6, 26, 39, 40],
 [9],
 [10, 14, 87, 148],
 [11, 55, 74, 75],
 [12],
 [13],
 [14, 10, 87, 148],
 [15, 24, 142],
 [16, 61, 100, 182],
 [17, 30, 52],
 [18, 66, 105],
 [19],
 [20],
 [21, 72],
 [22],
 [23],
 [24, 15, 142],
 [25],
 [26, 8, 39],
 [27, 73],
 [28],
 [29, 64, 65, 160],
 [30, 17, 52, 69],
 [31],
 [32],
 [33],
 [34, 61, 100, 111, 150],
 [35, 0],
 [36],
 [37],
 [38],
 [39, 6, 8, 26, 40],
 [40, 6, 8, 39],
 [41, 2, 56],
 [42],
 [43],
 [44],
 [45],
 [46, 110, 118],
 [47],
 [48, 91],
 [49],
 [50],
 [51],
 [52, 17, 30, 69],
 [53],
 [54],
 [55, 11],
 [56, 2, 41],
 [57],
 [58, 122, 153, 155],
 [59],
 [60],
 [61, 16, 34, 100, 150, 182],
 [62],
 [63],
 [64, 29, 160],
 [65, 29, 160],
 [66, 18, 105],
 [67],
 [68, 93],
 [69, 30, 52],
 [70],
 [71],
 [72, 21],
 [73, 27],
 [74, 11],
 [75, 11],
 [76],
 [77, 145, 159],
 [78],
 [79],
 [80],
 [81],
 [82],
 [83],
 [84],
 [85],
 [86],
 [87, 10, 14, 148],
 [88],
 [89],
 [90

In [90]:
'''
Similarity Graph merging
Find the similar clusters by recursively merging the similar sentences.
Use the highest score sentence as its cluster name.
'''


def merge_index(curr_index,original_list,curr_cluster):
    for index in original_list[curr_index]:
        if index not in curr_cluster:
            curr_cluster.append(index)
            merge_index(index,original_list,curr_cluster)
    return curr_cluster

cluster_list = []
access = []
for v,il in enumerate(index_list):
    if v not in access:
        cl = merge_index(v,index_list,[])
        for i in cl:
            access.append(i)
        cluster_list.append(cl)

final_result = []
for cluster in cluster_list:
    sum_s = 0
    for item in cluster:
        sum_s += new_list[item][1]
    item_list = [new_list[cluster[0]][0],sum_s]
    final_result.append(item_list)


def sort_score(e):
    return e[1]
final_result.sort(reverse=True,key=sort_score)

pprint(final_result)
print(len(final_result))




[['best director - motion picture', 30663.75],
 ['best actress - motion picture drama', 7872.0],
 ['best screenplay - motion picture', 7522.5],
 ['best actress - tv comedy / musical', 6457.0],
 ['best motion picture drama', 6106.5],
 ['best actress - motion picture comedy / musical', 5280.0],
 ['best actress - tv series drama', 3135],
 ['best actress - mini-series / tv movie', 3072],
 ['best actress drama', 2880.0],
 ['best supporting actor motion picture', 1954],
 ['best supporting actress tv series miniseries / tv movie', 1431],
 ['best supporting actress - motion picture', 1276.0],
 ['best motion picture comedy / musical', 1222.0],
 ['best actress tv series comedy', 1174],
 ['best actor - motion picture comedy / musical', 1170.0],
 ['best actor - motion picture drama', 1050.0],
 ['best actor - tv drama', 694.0],
 ['best actor - miniseries / tv movie', 670],
 ['best actor tv series - comedy / musical', 666],
 ['best actress - miniseries / tv movie', 576],
 ['best actress - comedy', 5

In [91]:
# Currently use 40 as its possible awards number. Can let user input awards number and double it.
final_awards_name = final_result[:40]
pprint(final_awards_name)

[['best director - motion picture', 30663.75],
 ['best actress - motion picture drama', 7872.0],
 ['best screenplay - motion picture', 7522.5],
 ['best actress - tv comedy / musical', 6457.0],
 ['best motion picture drama', 6106.5],
 ['best actress - motion picture comedy / musical', 5280.0],
 ['best actress - tv series drama', 3135],
 ['best actress - mini-series / tv movie', 3072],
 ['best actress drama', 2880.0],
 ['best supporting actor motion picture', 1954],
 ['best supporting actress tv series miniseries / tv movie', 1431],
 ['best supporting actress - motion picture', 1276.0],
 ['best motion picture comedy / musical', 1222.0],
 ['best actress tv series comedy', 1174],
 ['best actor - motion picture comedy / musical', 1170.0],
 ['best actor - motion picture drama', 1050.0],
 ['best actor - tv drama', 694.0],
 ['best actor - miniseries / tv movie', 670],
 ['best actor tv series - comedy / musical', 666],
 ['best actress - miniseries / tv movie', 576],
 ['best actress - comedy', 5

In [92]:
store = dict()
df = pd.read_json("./gg2013.json")
awards_token_dict = {}
for i, awards_name in enumerate(final_awards_name):
    clean_string = re.sub('[^A-Za-z0-9]+', ' ', awards_name[0])
    tokens = nltk.word_tokenize(clean_string)
    awards_token_dict[awards_name[0]] = tokens
print(awards_token_dict)

{'best director - motion picture': ['best', 'director', 'motion', 'picture'], 'best actress - motion picture drama': ['best', 'actress', 'motion', 'picture', 'drama'], 'best screenplay - motion picture': ['best', 'screenplay', 'motion', 'picture'], 'best actress - tv comedy / musical': ['best', 'actress', 'tv', 'comedy', 'musical'], 'best motion picture drama': ['best', 'motion', 'picture', 'drama'], 'best actress - motion picture comedy / musical': ['best', 'actress', 'motion', 'picture', 'comedy', 'musical'], 'best actress - tv series drama': ['best', 'actress', 'tv', 'series', 'drama'], 'best actress - mini-series / tv movie': ['best', 'actress', 'mini', 'series', 'tv', 'movie'], 'best actress drama': ['best', 'actress', 'drama'], 'best supporting actor motion picture': ['best', 'supporting', 'actor', 'motion', 'picture'], 'best supporting actress tv series miniseries / tv movie': ['best', 'supporting', 'actress', 'tv', 'series', 'miniseries', 'tv', 'movie'], 'best supporting actres

In [93]:
for tweet in df['text']:
    for awards_name, awards_token in awards_token_dict.items():
        if all(token in tweet for token in awards_token):
            if awards_name in store:
                store[awards_name].append(tweet)
            else:
                store[awards_name] = [tweet]
    #print(store)
pprint(store)



{'best actor - motion picture comedy / musical': ['Hugh Jackman wins best '
                                                  'actor in a motion picture '
                                                  'musical or comedy for Les '
                                                  'Mis #GoldenGlobes',
                                                  'RT @CNNshowbiz: Hugh '
                                                  'Jackman wins best actor in '
                                                  'a motion picture, comedy or '
                                                  'musical for "Les '
                                                  'Miserables" #GoldenGlobes',
                                                  'RT @CNNshowbiz: Hugh '
                                                  'Jackman wins best actor in '
                                                  'a motion picture, comedy or '
                                                  'musical for "Les '
      

In [94]:
from nltk.tag.stanford import StanfordNERTagger
import string

st = StanfordNERTagger('stanford-ner/classifiers/english.all.3class.distsim.crf.ser.gz','stanford-ner/stanford-ner.jar')
punc = string.punctuation
names = nltk.corpus.names.words()

results = {}
for awards_name,awards_tweets in store.items():
    result = []
    for awards_tweet in awards_tweets:
        tokens = nltk.word_tokenize(awards_tweet)
        tl = len(tokens)
        for i in range(0,tl-1):
            if tokens[i] in names and tokens[i+1]:
                result.append(tokens[i]+' '+tokens[i+1])
    results[awards_name] = (Counter(result).most_common()[:3])
    #print(result)
pprint(results)


{'best actor - motion picture comedy / musical': [('Hugh Jackman', 65),
                                                  ('Les Miserables', 63),
                                                  ('Les Mis', 2)],
 'best actor - motion picture drama': [('Daniel Day-Lewis', 4),
                                       ('Daniel Day', 2),
                                       ('Lewis wins', 2)],
 'best actor - tv drama': [('Love ALL', 1)],
 'best actor drama': [('Daniel Day-Lewis', 46),
                      ("Lincoln ''", 28),
                      ('Lewis wins', 18)],
 'best actor tv series - comedy / musical': [('Don Cheadle', 1)],
 'best actress': [('Anne Hathaway', 112),
                  ('Jennifer Lawrence', 91),
                  ('Lawrence wins', 75)],
 'best actress - comedy': [('Jennifer Lawrence', 42),
                           ('Lawrence wins', 38),
                           ('Lena Dunham', 35)],
 'best actress - mini-series': [('Julianne Moore', 24),
                        