In [1]:
import pandas as pd

In [2]:
full = pd.read_fwf("full.txt")
full.head()

Unnamed: 0,1,"A series of escapades demonstrating the adage that what is good for the goose is also good for the gander , some of which occasionally amuses but none of which amounts to much of a story .",Unnamed: 2
0,4,"This quiet , introspective and entertaining in...",
1,1,"Even fans of Ismail Merchant 's work , I suspe...",
2,3,A positively thrilling combination of ethnogra...,
3,1,Aggressive self-glorification and a manipulati...,
4,4,A comedy-drama of nearly epic proportions root...,


In [3]:
tiny = pd.read_fwf("tiny.txt")
tiny.head()

Unnamed: 0,0,Brando,seems uninspired by the heist script and is phoning,it,in
0,4,Marlon,Brando is incredible as the patriarch of the f...,,
1,1,Brando,"is Brando , but for this one it 's not enough",,


In [4]:
most_common = pd.read_fwf("most_common_english_words.txt")
most_common.head()

Unnamed: 0,the
0,be
1,and
2,of
3,a
4,in


In [5]:
small = pd.read_fwf("small.txt")
small.head()

Unnamed: 0,4,"It 's the kind of movie that , aside from Robert Altman , Spike Lee , the Coen Brothers and a few others , our moviemakers do n't make often enough .",Unnamed: 2
0,4,"Williams creates a stunning , Taxi Driver-esqu...",
1,3,Campbell Scott finds the ideal outlet for his ...,
2,3,"Has a certain ghoulish fascination , and gener...",
3,1,With its parade of almost perpetually wasted c...,
4,3,Bolstered by an astonishing voice cast excepti...,.


In [6]:
medium = pd.read_fwf("medium.txt")
medium.head()

Unnamed: 0,4,"A must for fans of British cinema , if only because so many titans of the industry are along for the ride .",Unnamed: 2
0,4,"It 's a fine , focused piece of work that reop...",
1,4,Romantic comedy and Dogme 95 filmmaking may se...,
2,1,At a time when we 've learned the hard way jus...,
3,4,A fascinating documentary about the long and e...,
4,3,Rodriguez has the chops of a smart-aleck film ...,


## PART I: File I/O, strings, lists

In [7]:
from typing import TextIO, List, Union, Dict, Tuple

# PART I: File I/O, strings, lists

def is_word(token: str) -> bool:
    '''Return True IFF token is an alphabetic word optionally containing
    forward slashes or dashes.
    
    >>> is_word('Amazing')
    True
    >>> is_word('writer/director')
    True
    >>> is_word('true-to-life')
    True
    >>> is_word("'re")
    False
    >>> is_word("1960s")
    False
    '''
    #need condition that inclues words containing dashes or slashes
    #need condition that ignores consistent punctuation
    #need condition to exclude apostrophe between letters
    clean_token=token.strip('\n')
    if clean_token.isalpha():
        return True
    elif "'" in clean_token:
        return False
    else:
        return False



In [8]:
is_word('Amazing')

True

In [9]:
def get_word_list(statement: str) -> List[str]:
    '''Return a list of words contained in statement, converted to lowercase. 
    Use is_word to determine whether each token in statement is a word.
    
    >>> get_word_list('A terrible , 1970s mess of true-crime nonsense from writer/director Shyamalan .')
    ['a', 'terrible', 'mess', 'of', 'true-crime', 'nonsense', 'from', 'writer/director', 'shyamalan']
    '''
    word_list=[]
    statement2=statement.split(' ')
    for item in statement2:
        if is_word(item):
            low_item=item.lower()
            word_list.append(low_item)
    return word_list


In [10]:
get_word_list('A terrible , 1970s mess of true-crime nonsense from writer/director Shyamalan .')

['a', 'terrible', 'mess', 'of', 'nonsense', 'from', 'shyamalan']

In [11]:
def judge(score: float) -> str:
    '''Return 'negative' if score is 1.5 or less.
    Return 'positive' if score is 2.5 or more.
    Return 'neutral' otherwise.
    >>> judge(1.3)
    'negative'
    >>> judge(1.8)
    'neutral'
    >>> judge(3.4)
    'positive'
    '''
    if score <= 1.5:
        return 'negative'
    elif score >= 2.5:
        return 'positive'
    else:
        return 'neutral'


In [12]:
judge(1.8)

'neutral'

In [13]:
def word_kss_scan(word: str, file: TextIO) -> Union[None, float]:
    '''Given file composed of rated movie reviews, return the average score
    of all occurrences of word in file. If word does not occur in file, return None.
    [examples not required]
    '''
    #convert string score into integer score
    #how many times did i see the word
    seen=0
    score=0
    for line in file:
        rate=int(line[0])
        word_list=get_word_list(line)
        for item in word_list:
            #assign score of review to word, if word in review
            if item==word:
                score+=rate
                seen+=1
    if seen!=0:
        return score/seen
    else:
        return None
    

## Part 2: Dictionnaries

In [14]:
def extract_kss(file: TextIO) -> Dict[str, List[int]]:
    '''Given file composed of rated movie reviews, return a dictionary
    containing all words in file as keys. For each key, store a list
    containing the total sum of review scores and the number of times
    the key has occurred as a value, e.g., { 'a' : [12, 4] }
    [examples not required]
    
    '''
    # accumulate per line, per word
    # rate needs to be added
    # create extracted_dict={}
    extracted_dict={}
    
    for line in file.readlines(): 
        rate = int(line.split(" ", 1)[0]) # separate each line into two parts, first part is the integer rate
        word_list = get_word_list(line.split(" ", 1)[1].strip("\n")) 
        # second part is the list of words in lowercase
        for word in word_list:
            if word in extracted_dict:
                extracted_dict[word_list][0] += rate
                extracted_dict[word_list][1] += 1
            else:
                extracted_dict[word] = [rate, 1]
                
    return extracted_dict
    #add items to extracted_dict: {extracted_dict[word][0] = rate, extracted_dict[word][1] = word}

In [15]:
def word_kss(word: str, kss: Dict[str, List[int]]) -> Union[float, None]:
    '''Return the Known Sentiment Score of word if it appears in kss. 
    If word does not appear in kss, return None.
    [examples not required]
    '''
    word = word.lower()
    if word in kss:
        return float(kss[word][0] / kss[word][1])
    return None
             

In [16]:
def statement_pss(statement: str, kss: Dict[str, List[int]]) -> Union[float, None]:
    '''Return the Predicted Sentiment Score of statement based on
    word Known Sentiment Scores from kss.
    Return None if statement contains no words from kss.'''
    #uses get_word_list
    word_list = get_word_list(statement)
    word_rate = 0.0
    counter = 0
    for item in word_list:
        if item in kss:
            word_rate += (kss[item][0] / kss[item][1])
            counter += 1
    if counter == 0:
        return None
    return word_rate / counter

## PART III: Word Frequencies

In [21]:
def score(item: Tuple[str, List[int]]) -> float:
    '''Given item as a (key, value) tuple, return the
    ratio of the first and second integer in value
    '''
    
    return item[1][0] / item[1][1]


In [22]:
def most_extreme_words(count, min_occ, kss, pos):
    '''Return a list of lists containing the count most extreme words
    that occur at least min_occ times in kss.
    Each item in the list is formatted as follows:
    [word, average score, number of occurrences]
    If pos is True, return the most positive words.
    If pos is False, return the most negative words.
    [examples not required]
    '''
    res = []
    temp_dict = {}
    
    for key, value in list(kss.items()):
        if value[1] >= min_occ:
            temp_dict[key] = value[:]
            
    sorted_list = sorted(temp_dict.items(), key=score, reverse=pos)[:count]
    
    for item in sorted_list:
        res.append([item[0], score(item), item[1][1]])
    
    
    return res
    

In [19]:
def most_negative_words(count, min_occ, kss):
    '''Return a list of the count most negative words that occur at least min_occ times in kss.
    '''
    
    return []

In [20]:
if __name__ == "__main__":

# Pick a dataset    
    dataset = 'tiny.txt'
    #dataset = 'small.txt'
    #dataset = 'medium.txt'
    #dataset = 'full.txt'
    
    with open('tiny.txt','r') as file:
        print(extract_kss(file))
        

TypeError: unhashable type: 'list'

In [None]:
d = {}
d[2] = 'coffee'
d[15] = 'juice'
d[7] = 'coffee'
d[5] = 'donuts'
d[9] = 'cake'
d[10] = 'balloons'

In [None]:
d

In [None]:
def reverse_lookup_lists(phone_num, phone_numbers, names):
    """ (str, list of str, list of str) -> str

    Precondition: len(phone_numbers) == len(names)

    This function receives a phone number phone_num, and two lists: a list of 
    phone numbers phone_numbers and a list of names names.  These lists are
    parallel lists, so the name in position 0 of the names list is 
    associated with the phone number in position 0 of the phone_numbers 
    list, and so on.

    Return the name associated with phone_num according to phone_numbers
    and names, or an empty string if there is no match.
    
    >>> reverse_lookup_lists('416-555-6543', ['416-555-3498', \
        '647-555-9812', '416-555-6543', '905-555-6681'], ['John A. Macdonald', \
        'Louis Riel', 'Canoe Head', 'Tim Horton'])        
    'Canoe Head'
    """
    res_str = ""
    for i in range(len(phone_numbers)):
        if phone_num == phone_numbers[i]:
            res_str = names[i]
    return res_str

In [None]:
reverse_lookup_lists('416-555-6543', ['416-555-3498', \
        '647-555-9812', '416-555-6543', '905-555-6681'], ['John A. Macdonald', \
        'Louis Riel', 'Canoe Head', 'Tim Horton'])   

In [None]:
def reverse_lookup_dictionary(phone_num, phone_to_name):
    """ (str, dict of {str: str}) -> str

    This function receives a phone number phone_num, and a dictionary
    phone_to_name in which each key is a phone number and each value
    is the name associated with that phone number.
	
    Return the name associated with phone_num in phone_to_name, or
    an empty string if there is no match.
    
    >>> reverse_lookup_dictionary("416-555-3498", {"416-555-3498": \
        "John A. Macdonald", "647-555-9812": "Louis Riel", "416-555-6543": \
        "Canoe Head", "905-555-6681":"Tim Horton"})
    'John A. Macdonald'        
    """
    if phone_num in phone_to_name:
        return phone_to_name[phone_num]
    return ""

In [None]:
reverse_lookup_dictionary("416-555-3498", {"416-555-3498": \
        "John A. Macdonald", "647-555-9812": "Louis Riel", "416-555-6543": \
        "Canoe Head", "905-555-6681":"Tim Horton"})

In [None]:
def get_quantities(table_to_foods):
    """ (dict of {str: list of str}) -> dict of {str: int}
	
    The table_to_foods dict has table names as keys (e.g., 't1', 't2', and so on) and each value
    is a list of foods ordered for that table.

    Return a dictionary where each key is a food from table_to_foods and each
    value is the quantity of that food that was ordered.
	
    >>> get_quantities({'t1': ['Vegetarian stew', 'Poutine', 'Vegetarian stew'], 't3': ['Steak pie', 'Poutine', 'Vegetarian stew'], 't4': ['Steak pie', 'Steak pie']})
    {'Vegetarian stew': 3, 'Poutine': 2, 'Steak pie': 3}	
    """

    food_to_quantity = {}
    # Accumulate the food information here.
    for table_order in table_to_foods.values():
        for menu_item in table_order:
            if menu_item in food_to_quantity:
                food_to_quantity[menu_item] += 1 
            else:
                food_to_quantity[menu_item] = 1

    return food_to_quantity

get_quantities({'t1': ['Vegetarian stew', 'Poutine', 'Vegetarian stew'], 't3': ['Steak pie', 'Poutine', 'Vegetarian stew'], 't4': ['Steak pie', 'Steak pie']})


In [None]:
get_quantities({'t1': ['Vegetarian stew', 'Poutine', 'Vegetarian stew'], 't3': ['Steak pie', 'Poutine', 'Vegetarian stew'], 't4': ['Steak pie', 'Steak pie']})


In [None]:
def Average(lst): 
    return sum(lst) / len(lst)

In [None]:
lst = [1,2,3] 
average = Average(lst) 

In [None]:
print("Average of the list =", round(average, 2))

In [None]:
dir(tuple)

In [23]:
for x in range(2,4):
    y=2
    while y < 4:
        print(x**y)
        y+=1
    

4
8
9
27
